private void SaveAttach(BidProject info, string htmltxt, string result, bool isUpdate) { List <BaseAttach> list = new List <BaseAttach>(); if (htmltxt.Contains("http")) { Parser parser = new Parser(new Lexer(htmltxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int j = 0; j < aNode.Count; j++) { ATag aTag = aNode[j].GetATag(); string attachName = aTag.LinkText; string aurl = string.Empty; aurl = aTag.Link.GetReplace("\\\"", ""); if (string.IsNullOrWhiteSpace(attachName)) { attachName = info.PrjName; } try { string url = System.Web.HttpUtility.UrlDecode(aurl); string[] urls = url.Split('&'); BaseAttach entity = null; if (isUpdate) { entity = ToolHtml.GetBaseAttach(url, attachName, result, "SiteManage\\Files\\Attach\\"); } else { entity = ToolHtml.GetBaseAttach(url, attachName, info.Id, "SiteManage\\Files\\Attach\\"); } if (entity != null) { list.Add(entity); } } catch { } } } } if (list.Count > 0) { if (isUpdate) { string delSql = string.Format("delete from BaseAttach where SourceID='{0}'", result); ToolFile.Delete(result); int count = ToolDb.ExecuteSql(delSql); } foreach (BaseAttach attach in list) { ToolDb.SaveEntity(attach, "SourceID,AttachServerPath"); } } }
protected void SaveAttach(string url, string sourceId) { List <BaseAttach> attach = new List <BaseAttach>(); string htmlAnnex = string.Empty; try { htmlAnnex = this.ToolWebSite.GetHtmlByUrl(url, Encoding.UTF8); } catch { } Parser dtparser = new Parser(new Lexer(htmlAnnex)); NodeList dtList = dtparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "ctl00_ContentPlaceHolder1_GridView1"), new TagNameFilter("table"))); if (dtList != null && dtList.Count > 0) { TableTag dttable = dtList[0] as TableTag; for (int t = 1; t < dttable.RowCount; t++) { ATag file = dttable.SearchFor(typeof(ATag), true)[t - 1] as ATag; if (file.IsAtagAttach()) { string aurl = "http://www.szjsjy.com.cn/" + file.Link.Replace("../", "").Replace("./", ""); try { BaseAttach entity = ToolHtml.GetBaseAttach(aurl, file.LinkText, sourceId, "SiteManage\\Files\\Attach\\"); if (entity != null) { attach.Add(entity); } } catch { } } } } if (attach.Count > 0) { string delSql = string.Format("delete from BaseAttach where SourceID='{0}'", sourceId); ToolFile.Delete(sourceId); int count = ToolDb.ExecuteSql(delSql); ToolDb.SaveDatas(attach, string.Empty); } }
protected override IList ExecuteCrawl(bool crawlAll) { string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty; infoType = "通知公告"; infoUrl = this.SiteUrl; string htldtl = string.Empty; try { htldtl = ToolHtml.GetHtmlByUrl(infoUrl, Encoding.UTF8).GetJsString(); } catch { } headName = "关于转发深圳市住房和建设局转发《深圳市交通运输委港航和货运交通管理局关于我市泥头车运输企业土石方运输业务投标资质考评和异地泥头车备案托管第二阶段情况的通报》的通知"; ctxHtml = "<table width='960' background='{root_path}images/xil_jl_05.jpg' border='0' cellspacing='0' cellpadding='0'> <tbody><tr> <td align='center' background='../../../images/xil_jl_03.jpg' valign='top' style='background-repeat: repeat-x;'><table width='100%' border='0' cellspacing='0' cellpadding='0'> <tbody><tr> <td width='9%'> </td> <td width='83%' height='25'> </td> <td width='8%'> </td> </tr> <tr> <td> </td> <td valign='top'><table width='100%' border='0' cellspacing='0' cellpadding='0'> <tbody><tr> <td width='8%' height='25' class='red12a'>题材分类:</td> <td width='42%'><a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('catalog1=327')'>通知公告公示</a></td> <td width='8%' class='red12a'>主题分类:</td> <td width='42%'><a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('catalog2=479')'>其他</a></td> </tr> <tr> <td height='25' class='red12a'>发文机构:</td> <td><span id='fbjgid' style='display: none;'><script>fbjg('深圳市南山区人民政府 ')</script><a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('district=深圳市南山区人民政府')'>深圳市南山区人民政府</a></span></td><script>var wh = ''; wh = wh.replace(/ /ig,''); wh = wh.replace(/ /ig,''); if(wh==''||wh==null||'无'==wh){ document.getElementById('fbjgid').style.display='none'; }</script> <td class='red12a'>来源网站发布日期:</td> <td><a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('urltime=2013.08.12')'>2013-08-12</a></td> </tr> <tr> <td height='25' class='red12a'>所属地区:</td> <td><script>ssdq('广东省深圳市 ')</script><a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('vreserved3=广东省深圳市')'>广东省深圳市</a>;</td> <td class='red12a'>文 号:</td> <td><script type='text/javascript'> ycwh(); </script></td> </tr> <tr> <td height='25' class='red12a' valign='top' style='padding-top: 8px;'>关 键 词:</td> <td valign='top' style='line-height: 20px; padding-top: 3px;'><script>gjzsj('深圳市;泥头车;货运交通;交通运输;备案;港航;土石方运输;投标资质;考评;异地')</script><a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('keywords=深圳市')'>深圳市</a>;<a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('keywords=泥头车')'>泥头车</a>;<a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('keywords=货运交通')'>货运交通</a>;<a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('keywords=交通运输')'>交通运输</a>;<a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('keywords=备案')'>备案</a>;<a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('keywords=港航')'>港航</a>;<a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('keywords=土石方运输')'>土石方运输</a>;<a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('keywords=投标资质')'>投标资质</a>;<a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('keywords=考评')'>考评</a>;<a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('keywords=异地')'>异地</a>;</td> <td class='red12a'>公文发布日期:</td> <td><a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('urldate=')'></a></td> </tr> </tbody></table></td> <td> </td> </tr> </tbody></table></td> </tr> <tr> <td bgcolor='#ffffff'><img width='943' height='8' src='../../../images/xil_jl_06.jpg'></td> </tr> </tbody></table> <table width='960' bgcolor='#ffffff' border='0' cellspacing='0' cellpadding='0'> <tbody><tr> <td align='center' valign='top'><table width='830' border='0' cellspacing='0' cellpadding='0'> <tbody><tr> <td align='center' class='dbiaoti' style='padding: 15px 0px;'>关于转发深圳市住房和建设局转发《深圳市交通运输委港航和货运交通管理局关于我市泥头车运输企业土石方运输业务投标资质考评和异地泥头车备案托管第二阶段情况的通报》的通知</td> </tr> </tbody></table> <table width='830' border='0' cellspacing='0' cellpadding='0'> <tbody><tr> <td><table width='100%' background='../../../images/erj_jl_122_28.jpg' border='0' cellspacing='0' cellpadding='0'> <tbody><tr> <td width='12'><img width='12' height='34' src='../../../images/erj_jl_121_25.jpg'></td> <td><table width='100%' height='25' align='center' border='0' cellspacing='0' cellpadding='0'> <tbody><tr> <td class='fff12'>来源:<script>lyjs('深圳市南山区人民政府')</script><a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('sitename=深圳市南山区人民政府')'>深圳市南山区人民政府</a>;</td> <td width='80'><a onclick='checkUrl('http://www.szns.gov.cn/publish/main/1/19/tzgg/20130812110509651949516/index.html','关于转发深圳市住房和建设局转发《深圳市交通运输委港航和货运交通管理局关于我市泥头车运输企业土石方运输业务投标资质考评和异地泥头车备案托管第二阶段情况的通报》的通知','4032393');' href='#'>原文链接 >></a></td> <td width='80'><a href='/search/htmlflash4Radar?docid=4032393'>网页快照</a> >> </td> </tr> </tbody></table></td> <td width='8'><img width='8' height='34' src='../../../images/erj_jl_123_30.jpg'></td> </tr> </tbody></table></td> </tr> </tbody></table> <table width='830' border='0' cellspacing='0' cellpadding='0'> <tbody><tr> <td class='zw_link' valign='top' style='padding: 20px 0px 0px;'> <br><br>各有关单位:<br> 现将《深圳市交通运输委港航和货运交通管理局关于我市泥头车运输企业土石方运输业务投标资质考评和异地泥头车备案托管第二阶段情况的通报》(深交港货[2013]164号)转发给你们,请遵照执行。目前,共有46家泥头车运输企业已获取我市土石方运输业务投标资质;共有82家异地企业204辆泥头车,分别与12家土石方运输业务投标资质企业达成了备案托管。<br> 特此通知。<br> 联系人:李衍航,电话:83788608。 <br> 附件:深交港货[2013]164号<br> 深圳市住房和建设局<br> 2013年8月9日<br> <br><br><br><br> <script type='text/javascript'> qufj(); </script><a href='./P020131018007991034107.pdf'> 附件:深交港货[2013]164号 </a><br> </td> </tr> </tbody></table> <table width='100%' border='0' cellspacing='0' cellpadding='0'> <tbody><tr> <td> </td> </tr> </tbody></table></td> </tr> </tbody></table>"; //infoCtx = ctxHtml.GetJsString().Replace("<tr>", "").Replace("</tr>", "").Replace("<br>", "\r\n").ToCtxString().Replace(">", ""); Parser parser = new Parser(new Lexer(htldtl)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("background", "../../../images/sd_in_09.jpg"))); if (nodeList != null && nodeList.Count > 0) { infoCtx = nodeList.AsHtml().Replace("<br>", "\r\n").ToCtxString().Replace(":\r\n", ":").Replace(">", ""); } msgType = infoScorce = "深圳市住房和建设局"; releaseTime = "2013-08-09"; NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "深圳市工程", string.Empty, infoCtx, infoType); if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate)) { BaseAttach attach = ToolHtml.GetBaseAttach("http://govinfo.nlc.gov.cn/gdsszfz/xxgk/szsnsqrmzf/201310/P020131018007991034107.pdf", "深交港货[2013]164号", info.Id); if (attach != null) { ToolDb.SaveEntity(attach, string.Empty); } } return(null); }
protected override IList ExecuteCrawl(bool crawlAll) { this.HttpApi(); //SetTemp(); DateTime time = ToolHtml.GetDateTimeByLong(1543593600000); string html = string.Empty; //string url = "http://web.zjj.sz.gov.cn/HouseOutService/queryFwRentms/getGrQyInfo.json"; //string url = "http://web.zjj.sz.gov.cn/HouseOutService/queryFwRentms/getApplyers.json"; string url = "http://web.zjj.sz.gov.cn/HouseOutService/queryFwRentms/getUnitQyInfos.json"; //string url = "http://web.zjj.sz.gov.cn/HouseOutService/queryFwRentms/getUnitFlatInfos.json"; //string url = "http://web.zjj.sz.gov.cn/zfxx_jscjn/external/project/info/get?pageIndex=1&pageSize=100"; string cookies = string.Empty; string publicKey = "bnNfZGF0YTpLSUlmMndLVWJ1RmVyVEhRZWh5WTFyNzNlVEM4VmVTb3p2eFBDanN2VVJRWnExR20xdVduVk1FQnlyK0ZrMEdhcVRGRzFVUUw1dTBDNEpxRWNRSVRra3NOYWgxcFVldnJCbnpTcDJaUnU3THpyNTZsUmhzd09NdHNiZHYxVCtJbGdHdzBEcUZXczJIVVYzZkw0NWFnbldqemt3MHJpVlJ2cEs5MFFiOHBMb1E9"; string key = string.Format("{0}{1}{2}{3}", "e02d02ec17a14446a861bbad068c40ef", "440305", "", "1990-01-01"); string keys = DESEncrypt.GenerateMD5(key); //"983f9b3b897c77f27c9bd27837d82f5f" //"983f9b3b897c77f27c9bd21837d02f5f" keys = keys.Replace("o", "p"); keys = keys.Replace("i", "t"); keys = keys.Replace("l", "n"); keys = keys.Replace("1", "7"); keys = keys.Replace("0", "8"); Dictionary <string, string> dic1 = new Dictionary <string, string>(); dic1.Add("key", keys); dic1.Add("belongto", "440305"); dic1.Add("quart", ""); dic1.Add("page", "60"); dic1.Add("timestamp", "1990-01-01"); string jsonStr = JsonConvert.SerializeObject(dic1); string jsonData = string.Empty; try { using (IWebHttpClient httpClient = new WebHttpClient()) { jsonData = httpClient.PostSync(new Uri(url), jsonStr, publicKey); } } catch (Exception ex) { } Dictionary <string, object> dics = JsonConvert.DeserializeObject <Dictionary <string, object> >(jsonData); object oobj = dics["date"]; Dictionary <string, object> contents = JsonConvert.DeserializeObject <Dictionary <string, object> >(oobj.ToString()); string str1 = contents["content"].ToString(); List <Dictionary <string, object> > jsonLists = JsonConvert.DeserializeObject <List <Dictionary <string, object> > >(str1); foreach (Dictionary <string, object> keys1 in jsonLists) { WebPactCorpData model = JsonConvert.DeserializeObject <WebPactCorpData>(JsonConvert.SerializeObject(keys1)); string str2 = keys1.ToString(); } string jsonsss = oobj.ToString(); KeyValuePair <string, object> keyValues = (KeyValuePair <string, object>)oobj; Dictionary <string, object> content = (Dictionary <string, object>)dics["date"]; object[] objs = (object[])content["content"]; foreach (object obj in objs) { WebPactCorpData corp = obj as WebPactCorpData; } WebPactCorp entity = JsonConvert.DeserializeObject <WebPactCorp>(jsonData); //using (IWebHttpClient httpClient = new WebHttpClient()) //{ // jsonData = httpClient.GetSync(new Uri(url), publicKey); //} //html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8, true, publicKey, ref cookies); IList list = new List <BidInfo>(); int sqlCount = 0; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + (this.MaxCount + 20)); } catch { return(null); } int startIndex = html.IndexOf("{"); int endIndex = html.LastIndexOf("}"); html = html.Substring(startIndex, (endIndex + 1) - startIndex); JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html); foreach (KeyValuePair <string, object> obj in smsTypeJson) { if (obj.Key == "total") { continue; } object[] array = (object[])obj.Value; foreach (object arrValue in array) { string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, HtmlTxt = string.Empty; Dictionary <string, object> dic = (Dictionary <string, object>)arrValue; code = Convert.ToString(dic["bdBH"]); prjName = Convert.ToString(dic["bdName"]); bidType = Convert.ToString(dic["gcLeiXing2"]); beginDate = Convert.ToString(dic["fabuTime2"]); try { bidMoney = Convert.ToString(dic["zhongBiaoJE"]).GetMoney(); } catch { } string addUrl = Convert.ToString(dic["detailUrl"]); //https://www.szjsjy.com.cn:8001/jyw/queryOldDataDetail.do?type=4&id=158df5f1-73a1-440c-a59b-e4ca1464b4e9 InfoUrl = "https://www.szjsjy.com.cn:8001/jyw/queryOldDataDetail.do?type=4&id=" + dic["dbZhongBiaoJieGuoGuid"]; try { HtmlTxt = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString().GetReplace("\\t,\\r,\\n,\""); } catch { } List <Dictionary <string, string> > dicFile = new List <Dictionary <string, string> >(); if (string.IsNullOrEmpty(HtmlTxt)) { string strHtml = string.Empty; string newUrl = "https://www.szjsjy.com.cn:8001/jyw/queryZbgs.do?guid=" + dic["dbZhongBiaoJieGuoGuid"] + "&ggGuid=bdGuid="; InfoUrl = Convert.ToString(dic["detailUrl"]); try { HtmlTxt = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString().GetReplace("\\t,\\r,\\n,\""); strHtml = this.ToolWebSite.GetHtmlByUrl(newUrl).GetJsString(); } catch { } if (!string.IsNullOrEmpty(strHtml)) { string gcBH = string.Empty, gcName = string.Empty, xmBH = string.Empty, xmName = string.Empty, zbgsStartTime = string.Empty, zbgsEndTime = string.Empty, zbRName = string.Empty, zbdlJG = string.Empty, zbFangShi = string.Empty, bdName = string.Empty, tbrName = string.Empty, zhongBiaoJE = string.Empty, zhongBiaoGQ = string.Empty, xiangMuJiLi = string.Empty, ziGeDengJi = string.Empty, ziGeZhengShu = string.Empty, isZanDingJinE = string.Empty, gcLeiXing = string.Empty, isPLZB = string.Empty, ztbFileGroupGuid = string.Empty; try { Dictionary <string, string> zbfs = new Dictionary <string, string>(); zbfs.Add("2", "邀请招标"); zbfs.Add("1", "公开招标"); zbfs.Add("YuXuanZhaoBiaoZGC", "预选招标子工程"); zbfs.Add("GongKaiZhaoBiao", "公开招标"); zbfs.Add("5", "预选招标子工程"); zbfs.Add("4", "单一来源"); zbfs.Add("DanYiLaiYuan", "单一来源"); zbfs.Add("YaoQingZhaoBiao", "邀请招标"); JavaScriptSerializer newSerializer = new JavaScriptSerializer(); Dictionary <string, object> newTypeJson = (Dictionary <string, object>)newSerializer.DeserializeObject(strHtml); Dictionary <string, object> bd = newTypeJson["bd"] as Dictionary <string, object>; Dictionary <string, object> gc = bd["gc"] as Dictionary <string, object>; ztbFileGroupGuid = Convert.ToString(newTypeJson["ztbFileGroupGuid"]); gcBH = Convert.ToString(gc["gcBH"]); gcName = Convert.ToString(gc["gcName"]); Dictionary <string, object> xm = bd["xm"] as Dictionary <string, object>; if (xm != null) { xmBH = Convert.ToString(xm["xm_BH"]); xmName = Convert.ToString(xm["xm_Name"]); } object startTime = newTypeJson["zbgsStartTime"]; if (startTime != null) { zbgsStartTime = ToolHtml.GetDateTimeByLong(Convert.ToInt64(startTime)).ToString("yyyy-MM-dd HH:mm"); } object endTime = newTypeJson["zbgsEndTime"]; if (endTime != null) { endDate = zbgsEndTime = ToolHtml.GetDateTimeByLong(Convert.ToInt64(endTime)).ToString("yyyy-MM-dd HH:mm"); } buildUnit = zbRName = Convert.ToString(gc["zbRName"]); zbdlJG = Convert.ToString(newTypeJson["zbdlJG"]); zbFangShi = Convert.ToString(gc["zbFangShi"]); if (!string.IsNullOrEmpty(zbFangShi)) { zbFangShi = zbfs[zbFangShi]; } bdName = Convert.ToString(bd["bdName"]); bidUnit = tbrName = Convert.ToString(newTypeJson["tbrName"]); zhongBiaoJE = Convert.ToString(newTypeJson["zhongBiaoJE"]); if (!string.IsNullOrEmpty(zhongBiaoJE)) { try { bidMoney = zhongBiaoJE = (decimal.Parse(zhongBiaoJE) / 1000000).ToString(); } catch { } } else { try { zhongBiaoJE = Convert.ToString(newTypeJson["tongYongZhongBiaoJia"]); bidMoney = (zhongBiaoJE + "\r\n").GetMoneyRegex(new string[] { "人民币" }); } catch { } } zhongBiaoGQ = Convert.ToString(newTypeJson["zhongBiaoGQ"]); prjMgr = xiangMuJiLi = Convert.ToString(newTypeJson["xiangMuJiLi"]); ziGeDengJi = Convert.ToString(newTypeJson["ziGeDengJi"]); ziGeZhengShu = Convert.ToString(newTypeJson["ziGeZhengShu"]); isZanDingJinE = Convert.ToString(newTypeJson["isZanDingJinE"]); gcLeiXing = Convert.ToString(bd["gcLeiXing"]); isPLZB = Convert.ToString(gc["isPLZB"]); } catch (Exception ex) { Logger.Error(ex); } Parser parser = new Parser(new Lexer(HtmlTxt)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "detail_contect"))); if (dtlNode != null && dtlNode.Count > 0) { HtmlTxt = dtlNode.AsHtml(); HtmlTxt = HtmlTxt.GetReplace("<span id=gcBH></span>", "<span id=gcBH>" + gcBH + "</span>"); HtmlTxt = HtmlTxt.GetReplace("<span id=gcName></span>", "<span id=gcName>" + gcName + "</span>"); HtmlTxt = HtmlTxt.GetReplace("<span id=xmBH></span>", "<span id=xmBH>" + xmBH + "</span>"); HtmlTxt = HtmlTxt.GetReplace("<span id=xmName></span>", "<span id=xmName>" + xmName + "</span>"); HtmlTxt = HtmlTxt.GetReplace("<span id=zbgsStartTime></span>", "<span id=zbgsStartTime>" + zbgsStartTime + "</span>"); HtmlTxt = HtmlTxt.GetReplace("<span id=zbgsEndTime></span>", "<span id=zbgsEndTime>" + zbgsEndTime + "</span>"); HtmlTxt = HtmlTxt.GetReplace("<span id=zbRName></span>", "<span id=zbRName>" + zbRName + "</span>"); HtmlTxt = HtmlTxt.GetReplace("<span id=zbdlJG></span>", "<span id=zbdlJG>" + zbdlJG + "</span>"); HtmlTxt = HtmlTxt.GetReplace("<span id=zbFangShi></span>", "<span id=zbFangShi>" + zbFangShi + "</span>"); HtmlTxt = HtmlTxt.GetReplace("<span id=bdName></span>", "<span id=bdName>" + bdName + "</span>"); HtmlTxt = HtmlTxt.GetReplace("<span id=tbrName></span>", "<span id=tbrName>" + tbrName + "</span>"); HtmlTxt = HtmlTxt.GetReplace("<span id=zhongBiaoJE></span>", "<span id=zhongBiaoJE>" + zhongBiaoJE + "</span>"); HtmlTxt = HtmlTxt.GetReplace("<span id=zhongBiaoGQ></span>", "<span id=zhongBiaoGQ>" + zhongBiaoGQ + "</span>"); HtmlTxt = HtmlTxt.GetReplace("<span id=xiangMuJiLi></span>", "<span id=xiangMuJiLi>" + xiangMuJiLi + "</span>"); HtmlTxt = HtmlTxt.GetReplace("<span id=ziGeDengJi></span>", "<span id=ziGeDengJi>" + ziGeDengJi + "</span>"); HtmlTxt = HtmlTxt.GetReplace("<span id=ziGeZhengShu></span>", "<span id=ziGeZhengShu>" + ziGeZhengShu + "</span>"); HtmlTxt = HtmlTxt.GetReplace("<span id=isZanDingJinE></span>", "<span id=isZanDingJinE>" + isZanDingJinE.ToLower() == "true" ? "是" : "否" + "</span>"); } string fileUrl = "https://www.szjsjy.com.cn:8001/jyw/filegroup/queryByGroupGuidZS.do?groupGuid=" + ztbFileGroupGuid; string fileHtml = string.Empty; try { fileHtml = this.ToolWebSite.GetHtmlByUrl(fileUrl); JavaScriptSerializer fileSerializer = new JavaScriptSerializer(); Dictionary <string, object> fileTypeJson = (Dictionary <string, object>)fileSerializer.DeserializeObject(fileHtml); foreach (KeyValuePair <string, object> fileObj in fileTypeJson) { object[] fileArray = (object[])fileObj.Value; foreach (object fileValue in fileArray) { Dictionary <string, object> tempDic = (Dictionary <string, object>)fileValue; Dictionary <string, string> file = new Dictionary <string, string>(); file.Add("Name", Convert.ToString(tempDic["attachName"])); file.Add("Url", Convert.ToString("https://www.szjsjy.com.cn:8001/file/downloadFile?fileId=" + tempDic["attachGuid"])); dicFile.Add(file); } } } catch { } } } bidCtx = HtmlTxt.GetReplace("<br />,<br/>,</ br>,</br>", "\r\n").ToCtxString() + "\r\n"; if (string.IsNullOrEmpty(buildUnit) && string.IsNullOrEmpty(bidUnit)) { bidUnit = bidCtx.GetBidRegex(); if (string.IsNullOrEmpty(bidUnit)) { bidUnit = bidCtx.Replace(" ", "").GetBidRegex(); } if (string.IsNullOrEmpty(bidUnit)) { bidUnit = bidCtx.GetRegex("中 标 人"); } string money = bidCtx.GetMoneyRegex(); if (!string.IsNullOrEmpty(money)) { bidMoney = money; } if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0") { bidMoney = bidCtx.GetMoneyRegex(); } if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0") { bidMoney = bidCtx.Replace(" ", "").GetMoneyRegex(); } if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0") { bidMoney = bidCtx.GetRegex("中 标 价"); } prjMgr = bidCtx.GetMgrRegex(); if (string.IsNullOrEmpty(prjMgr)) { prjMgr = bidCtx.Replace(" ", "").GetMgrRegex(); } if (string.IsNullOrEmpty(prjMgr)) { prjMgr = bidCtx.GetRegex("项 目 总 监"); } buildUnit = bidCtx.GetBuildRegex(); if (string.IsNullOrEmpty(buildUnit)) { buildUnit = bidCtx.Replace(" ", "").GetBuildRegex(); } if (string.IsNullOrEmpty(buildUnit)) { buildUnit = bidCtx.GetRegex("建 设 单 位"); } prjAddress = bidCtx.GetAddressRegex(); if (string.IsNullOrEmpty(prjAddress)) { prjAddress = bidCtx.Replace(" ", "").GetAddressRegex(); } if (string.IsNullOrEmpty(prjAddress)) { prjAddress = bidCtx.GetRegex("工 程 地 址"); } } specType = "建设工程"; msgType = "深圳市建设工程交易中心"; if (Encoding.Default.GetByteCount(prjMgr) > 50) { prjMgr = ""; } if (Encoding.Default.GetByteCount(bidUnit) > 150) { string[] bidUnits = bidUnit.Split(';'); bidUnit = bidUnits[0]; } if (Encoding.Default.GetByteCount(bidUnit) > 150) { bidUnit = ""; } if (prjMgr.Contains("----")) { prjMgr = ""; } BidInfo info = ToolDb.GenBidInfo("广东省", "深圳市工程", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, string.Empty, bidMoney, addUrl, prjMgr, HtmlTxt); sqlCount++; if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate)) { if (dicFile.Count > 0) { foreach (Dictionary <string, string> file in dicFile) { try { BaseAttach item = ToolHtml.GetBaseAttach(file["Url"], file["Name"], info.Id, "SiteManage\\Files\\InviteAttach\\"); if (item != null) { ToolDb.SaveEntity(item, "SourceID,AttachServerPath"); } } catch { } } } } if (!crawlAll && sqlCount >= this.MaxCount) { return(null); } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { //取得页码 int pageInt = 1, sqlCount = 0; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default).GetJsString(); } catch (Exception ex) { return(null); } Parser parser = new Parser(new Lexer(html)); NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "font9green2"))); if (pageList != null && pageList.Count > 0) { try { string temp = pageList.GetATag(pageList.Count - 1).Link.Replace("&", "kdxx") + "kdxx"; temp = temp.GetRegexBegEnd("page=", "kdxx").Replace("&", ""); pageInt = Convert.ToInt32(temp); } catch { pageInt = 1; } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl + "&page=" + i.ToString(), Encoding.Default).GetJsString(); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "font9grey1"))); if (nodeList != null && nodeList.Count > 0) { TableTag table = nodeList[0] as TableTag; for (int j = 0; j < table.RowCount; j++) { string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty; TableRow tr = table.Rows[j]; infoType = "办事指南"; headName = tr.Columns[1].ToNodePlainString(); releaseTime = tr.Columns[2].ToPlainTextString().GetDateRegex(); infoUrl = "http://www.gzzb.gd.cn" + tr.Columns[1].GetATagHref(); string htldtl = string.Empty; try { htldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htldtl)); NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "contentDiv"))); if (dtlList != null && dtlList.Count > 0) { ctxHtml = dtlList.AsHtml(); infoCtx = ctxHtml.ToCtxString(); msgType = MsgTypeCosnt.GuangZhouMsgType; NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "广州市区", string.Empty, infoCtx, infoType); sqlCount++; if (!crawlAll && sqlCount >= this.MaxCount) { return(null); } if (ToolDb.SaveEntity(info, this.ExistCompareFields)) { parser = new Parser(new Lexer(ctxHtml)); NodeList imgList = parser.ExtractAllNodesThatMatch(new TagNameFilter("img")); if (imgList != null && imgList.Count > 0) { for (int img = 0; img < imgList.Count; img++) { ImageTag imgTag = imgList[img] as ImageTag; try { BaseAttach obj = null; if (imgTag.GetAttribute("src").Contains("http")) { obj = ToolHtml.GetBaseAttach(imgTag.GetAttribute("src"), headName, info.Id); } else { obj = ToolHtml.GetBaseAttach("http://www.gzzb.gd.cn" + imgTag.GetAttribute("src"), headName, info.Id); } if (obj != null) { ToolDb.SaveEntity(obj, string.Empty); } } catch { } } } parser = new Parser(new Lexer(ctxHtml)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int a = 0; a < aNode.Count; a++) { ATag aTag = aNode[a] as ATag; if (aTag.IsAtagAttach()) { try { BaseAttach obj = null; if (aTag.Link.Contains("http")) { obj = ToolHtml.GetBaseAttach(aTag.Link, aTag.LinkText, info.Id); } else { obj = ToolHtml.GetBaseAttach("http://www.gzzb.gd.cn" + aTag.Link, aTag.LinkText, info.Id); } if (obj != null) { ToolDb.SaveEntity(obj, string.Empty); } } catch { } } } } } } } } } return(null); }
protected override IList ExecuteCrawl(bool crawlAll) { //取得页码 int pageInt = 1, sqlCount = 0; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8); } catch (Exception ex) { return(null); } Parser parser = new Parser(new Lexer(html)); NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "GridView1")), true), new TagNameFilter("table"))); if (pageList != null && pageList.Count > 0) { try { TableTag table = pageList[0] as TableTag; pageInt = table.Rows[0].ColumnCount + 1; } catch { pageInt = 1; } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { viewState = this.ToolWebSite.GetAspNetViewState(html); eventValidation = this.ToolWebSite.GetAspNetEventValidation(html); NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "__VIEWSTATEENCRYPTED", "__EVENTVALIDATION", "sel", "beginDate", "endDate", "infotitle" }, new string[] { "GridView1", "Page$" + i.ToString(), viewState, "", eventValidation, "1", "", "", "" }); html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "GridView1"))); if (nodeList != null && nodeList.Count > 0) { TableTag table = nodeList[0] as TableTag; for (int j = 1; j < table.RowCount - 1; j++) { string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty; TableRow tr = table.Rows[j]; headName = tr.Columns[1].ToNodePlainString(); releaseTime = tr.Columns[3].ToPlainTextString().GetDateRegex(); if (string.IsNullOrEmpty(releaseTime)) { releaseTime = tr.Columns[3].ToPlainTextString().GetDateRegex("yyyy/MM/dd"); } infoScorce = tr.Columns[2].ToNodePlainString(); infoType = "通知公告"; infoUrl = "http://www.szjsjy.com.cn/Notify/" + tr.Columns[1].GetATagHref();//"http://www.szjsjy.com.cn/Notify/InformContent.aspx?id=117750";// string htldtl = string.Empty; try { htldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.UTF8).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htldtl)); NodeList noList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("background", "../img/A-3_17.gif"))); if (noList != null && noList.Count > 0) { ctxHtml = noList.AsHtml().Replace("<br/>", "\r\n").Replace("<BR/>", ""); infoCtx = noList.AsString().Replace(" ", "").Replace(" ", "").Replace("\t\t", "\t").Replace("\t\t", "\t"); infoCtx = Regex.Replace(infoCtx, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase).Replace(" ", "").Replace("\t", "").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n"); msgType = MsgTypeCosnt.ShenZhenMsgType; infoScorce = infoScorce.Replace(" ", ""); NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "深圳市工程", string.Empty, infoCtx, infoType); if (!crawlAll && sqlCount >= this.MaxCount) { return(null); } else { sqlCount++; if (ToolDb.SaveEntity(info, this.ExistCompareFields)) { parser = new Parser(new Lexer(ctxHtml)); NodeFilter aLink = new TagNameFilter("a"); NodeList aList = parser.ExtractAllNodesThatMatch(aLink); if (aList != null && aList.Count > 0) { for (int k = 0; k < aList.Count; k++) { ATag a = aList[k].GetATag(); if (a != null) { if (!a.LinkText.Contains("返回")) { try { BaseAttach obj = ToolHtml.GetBaseAttach("http://www.szjsjy.com.cn/" + a.Link.Replace("../", ""), a.LinkText, info.Id); if (obj != null) { ToolDb.SaveEntity(obj, string.Empty); } } catch { } } } } } } } } } } } return(null); }
protected override IList ExecuteCrawl(bool crawlAll) { //取得页码 int pageInt = 1, sqlCount = 0; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default).GetJsString(); } catch (Exception ex) { return(null); } Parser parser = new Parser(new Lexer(html)); NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("height", "28"))); if (pageList != null && pageList.Count > 0) { try { string temp = pageList.AsString().GetRegexBegEnd(",共", "页"); pageInt = Convert.ToInt32(temp); } catch { pageInt = 1; } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl + "&pageNum=" + i.ToString(), Encoding.Default).GetJsString(); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "95%"))); if (nodeList != null && nodeList.Count > 1) { TableTag table = nodeList[1] as TableTag; for (int j = 0; j < table.RowCount; j++) { string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty; TableRow tr = table.Rows[j]; infoType = "政策法规"; headName = tr.Columns[0].ToNodePlainString(); releaseTime = tr.Columns[1].ToPlainTextString().GetDateRegex(); infoUrl = "http://market.meizhou.gov.cn" + tr.Columns[0].GetATagValue("onclick").GetRegexBegEnd(",'", "',"); string htldtl = string.Empty; try { htldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htldtl)); NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("align", "center"))); if (dtlList != null && dtlList.Count > 0) { if (dtlList.Count > 1) { ctxHtml = dtlList[1].ToHtml(); } else { ctxHtml = dtlList.ToHtml(); } infoCtx = ctxHtml.ToCtxString().Replace(">", ""); msgType = MsgTypeCosnt.MeiZhouMsgType; NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "梅州市区", string.Empty, infoCtx, infoType); if (!crawlAll && sqlCount >= this.MaxCount) { return(null); } else { sqlCount++; if (ToolDb.SaveEntity(info, this.ExistCompareFields)) { parser = new Parser(new Lexer(htldtl)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int a = 0; a < aNode.Count; a++) { ATag aTag = aNode[a] as ATag; if (aTag.IsAtagAttach()) { try { BaseAttach baseInfo = ToolHtml.GetBaseAttach("http://market.meizhou.gov.cn" + aTag.Link, aTag.LinkText, info.Id); if (baseInfo != null) { ToolDb.SaveEntity(baseInfo, string.Empty); } } catch { } } } } } } } } } } return(null); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <InviteInfo>(); int sqlCount = 0; string html = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + this.MaxCount); } catch { return(null); } int startIndex = html.IndexOf("{"); int endIndex = html.LastIndexOf("}"); html = html.Substring(startIndex, (endIndex + 1) - startIndex); JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html); object[] objvalues = smsTypeJson["rows"] as object[]; foreach (object objValue in objvalues) { Dictionary <string, object> dic = (Dictionary <string, object>)objValue; string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; code = Convert.ToString(dic["gcBH"]); prjName = Convert.ToString(dic["gcName"]); //if (!prjName.Contains("新安翻身小学教学楼防水工程(小型工程)")) // continue; beginDate = Convert.ToString(dic["ggStartTime2"]).GetDateRegex(); string end = Convert.ToString(dic["ggEndTime"]); try { endDate = ToolHtml.GetDateTimeByLong(Convert.ToInt64(end)).ToString(); } catch { } inviteType = Convert.ToString(dic["gcLeiXing2"]); InfoUrl = Convert.ToString(dic["detailUrl"]); try { string urll = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/queryOldOTDataDetail.do?type=1&id=" + dic["gcGuid"]; try { HtmlTxt = this.ToolWebSite.GetHtmlByUrl(urll).GetJsString().GetReplace("\\t,\\r,\\n,\""); } catch { } if (string.IsNullOrWhiteSpace(HtmlTxt)) { urll = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/showGongGao.do?ggGuid=" + dic["ggGuid"]; } HtmlTxt = this.ToolWebSite.GetHtmlByUrl(urll).GetJsString().GetReplace("\\t,\\r,\\n,\""); HtmlTxt = HtmlTxt.GetReplace("},{,maoDian:,html:"); if (string.IsNullOrWhiteSpace(HtmlTxt)) { urll = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/queryOldOTDataDetail.do?type=1&id=" + dic["gcGuid"]; HtmlTxt = this.ToolWebSite.GetHtmlByUrl(urll).GetJsString().GetReplace("\\t,\\r,\\n,\""); } } catch { //Logger.Error(prjName); continue; } inviteCtx = HtmlTxt.Replace("</span>", "\r\n").Replace("<br />", "\r\n").Replace("<BR>", "\r\n").Replace("<br/>", "\r\n").ToCtxString(); prjAddress = inviteCtx.GetAddressRegex(); buildUnit = inviteCtx.GetBuildRegex(); if (string.IsNullOrEmpty(code)) { code = inviteCtx.GetCodeRegex(); } msgType = "深圳市建设工程交易中心宝安分中心"; specType = "建设工程"; InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳宝安区工程", "宝安区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt); sqlCount++; if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx)) { Parser parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k] as ATag; if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link.Replace("\\", ""); BaseAttach attach = null; try { attach = ToolHtml.GetBaseAttach(link, a.LinkText, info.Id, "SiteManage\\Files\\InviteAttach\\"); } catch { } if (attach != null) { ToolDb.SaveEntity(attach, ""); } } } } } } if (!crawlAll && sqlCount >= this.MaxCount) { return(list); } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); int pageInt = 31; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8); } catch { return(list); } int startIndex = html.IndexOf("<xml"); int endIndex = html.IndexOf("</xml>"); string xmlstr = html.Substring(startIndex, endIndex - startIndex).ToLower().GetReplace("infourl", "span").GetReplace("info", "div").GetReplace("publishedtime", "p"); Parser parser = new Parser(new Lexer(xmlstr)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("div")); if (pageNode != null && pageNode.Count > 0) { for (int i = 0; i < pageNode.Count; i++) { string prjName = string.Empty, InfoUrl = string.Empty, beginDate = string.Empty, HtmlTxt = string.Empty; parser = new Parser(new Lexer(pageNode[i].ToHtml())); NodeList dateNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("p")); beginDate = dateNode[0].ToPlainTextString().GetDateRegex(); parser.Reset(); NodeList urlNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("span")); InfoUrl = "http://jingtai.by.gov.cn/publicfiles/business/htmlfiles/" + urlNode[0].ToPlainTextString(); parser.Reset(); NodeList prjNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("title")); prjName = prjNode[0].ToNodePlainString(); string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtl = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "zoomcon"))); if (dtl != null && dtl.Count > 0) { HtmlTxt = dtl.AsHtml(); if (prjName.Contains("中标") || prjName.Contains("成交") || prjName.Contains("结果")) { string buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty; Parser imgParser = new Parser(new Lexer(HtmlTxt.ToLower())); NodeList imgNode = imgParser.ExtractAllNodesThatMatch(new TagNameFilter("img")); string src = string.Empty; if (imgNode != null && imgNode.Count > 0) { string imgUrl = (imgNode[0] as ImageTag).GetAttribute("src"); src = "http://jingtai.by.gov.cn/" + imgUrl; HtmlTxt = HtmlTxt.ToLower().GetReplace(imgUrl, src); } bidCtx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString(); code = bidCtx.GetCodeRegex().GetCodeDel(); buildUnit = bidCtx.GetBuildRegex(); if (buildUnit.Contains("地址")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址")); } if (buildUnit.Contains("公司")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司"; } bidUnit = bidCtx.GetBidRegex(); if (string.IsNullOrEmpty(bidUnit)) { bidUnit = bidCtx.GetRegexBegEnd("招标人确定", "单位"); } if (string.IsNullOrEmpty(bidUnit)) { bidUnit = bidCtx.GetRegexBegEnd("确认", "为中标"); } if (bidUnit.Contains("公司")) { bidUnit = bidUnit.Remove(bidUnit.IndexOf("公司")) + "公司"; } bidMoney = bidCtx.GetMoneyRegex(null, false, "万元"); if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0") { bidMoney = bidCtx.GetMoneyRegex(null, false, "万元", 100, ";"); } if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0") { bidMoney = bidCtx.GetMoneyRegex(new string[] { "投标总报价" }, false, "万元", 100, ";"); } bidUnit = bidUnit.GetReplace("名称"); try { if (decimal.Parse(bidMoney) > 100000) { bidMoney = (decimal.Parse(bidMoney) / 10000).ToString(); } } catch { } if (string.IsNullOrEmpty(buildUnit)) { buildUnit = "广州市白云区景泰街道办事处"; } msgType = "广州市白云区景泰街道办事处"; specType = "政府采购"; bidType = prjName.GetInviteBidType(); BidInfo info = ToolDb.GenBidInfo("广东省", "广州政府采购", "白云区", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); if (!string.IsNullOrEmpty(src)) { string sql = string.Format("select Id from BidInfo where InfoUrl='{0}'", info.InfoUrl); object obj = ToolDb.ExecuteScalar(sql); if (obj == null || obj.ToString() == "") { try { BaseAttach attach = ToolHtml.GetBaseAttach(src, prjName, info.Id, "SiteManage\\Files\\InviteAttach\\"); if (attach != null) { ToolDb.SaveEntity(attach, ""); } } catch { } } } parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k].GetATag(); if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link; } else { link = "http://jingtai.by.gov.cn/" + a.Link.GetReplace("./"); } BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link); base.AttachList.Add(attach); } } } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } else { string code = string.Empty, buildUnit = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty; inviteCtx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString(); inviteType = prjName.GetInviteBidType(); Parser imgParser = new Parser(new Lexer(HtmlTxt.ToLower())); NodeList imgNode = imgParser.ExtractAllNodesThatMatch(new TagNameFilter("img")); string src = string.Empty; if (imgNode != null && imgNode.Count > 0) { string imgUrl = (imgNode[0] as ImageTag).GetAttribute("src"); src = "http://jingtai.by.gov.cn/" + imgUrl; HtmlTxt = HtmlTxt.ToLower().GetReplace(imgUrl, src); } code = inviteCtx.GetCodeRegex().GetCodeDel(); buildUnit = inviteCtx.GetBuildRegex(); prjAddress = inviteCtx.GetAddressRegex(); if (buildUnit.Contains("地址")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址")); } if (buildUnit.Contains("公司")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司"; } if (string.IsNullOrEmpty(buildUnit)) { buildUnit = "广州市白云区景泰街道办事处"; } msgType = "广州市白云区景泰街道办事处"; specType = "政府采购"; InviteInfo info = ToolDb.GenInviteInfo("广东省", "广州政府采购", "白云区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt); list.Add(info); if (!string.IsNullOrEmpty(src)) { string sql = string.Format("select Id from InviteInfo where InfoUrl='{0}'", info.InfoUrl); object obj = ToolDb.ExecuteScalar(sql); if (obj == null || obj.ToString() == "") { try { BaseAttach attach = ToolHtml.GetBaseAttach(src, prjName, info.Id, "SiteManage\\Files\\InviteAttach\\"); if (attach != null) { ToolDb.SaveEntity(attach, ""); } } catch { } } } parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k].GetATag(); if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link; } else { link = "http://jingtai.by.gov.cn/" + a.Link.GetReplace("./"); } BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link); base.AttachList.Add(attach); } } } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <NotifyInfo>(); int pageInt = 1, sqlCount = 0; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; string cookiestr = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default, ref cookiestr); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "MoreInfoList1_Pager"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode[0].ToPlainTextString().GetRegexBegEnd("总页数", "当前").Replace(":", ""); pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { viewState = this.ToolWebSite.GetAspNetViewState(html); NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection( new string[] { "__VIEWSTATE", "__EVENTTARGET", "__EVENTARGUMENT" }, new string[] { viewState, "MoreInfoList1$Pager", i.ToString() } ); try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1"))); if (listNode != null && listNode.Count > 0) { TableTag table = listNode[0] as TableTag; for (int j = 0; j < table.RowCount; j++) { string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty; TableRow tr = table.Rows[j]; ATag aTag = tr.Columns[1].GetATag(); headName = aTag.GetAttribute("title"); releaseTime = tr.Columns[2].ToPlainTextString().GetDateRegex(); infoUrl = "http://www.gaxqjyzx.com" + aTag.Link; string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent"))); if (dtlNode != null && dtlNode.Count > 0) { ctxHtml = dtlNode.AsHtml(); infoCtx = ctxHtml.ToCtxString(); msgType = "贵安新区公共资源交易中心"; NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "贵州省", "贵州省及地市", "贵安新区", infoCtx, "通知公告"); sqlCount++; if (!crawlAll && sqlCount >= this.MaxCount) { return(null); } if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate)) { parser = new Parser(new Lexer(ctxHtml)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag fileATag = aNode[k].GetATag(); if (fileATag.IsAtagAttach()) { BaseAttach obj = null; try { if (fileATag.Link.ToLower().Contains("http")) { obj = ToolHtml.GetBaseAttach(fileATag.Link, headName, info.Id); } else { obj = ToolHtml.GetBaseAttach("http://www.gaxqjyzx.com" + fileATag.Link, headName, info.Id); } } catch { } if (obj != null) { ToolDb.SaveEntity(obj, string.Empty); } } } } else { parser.Reset(); NodeList imgNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("img")); if (imgNode != null && imgNode.Count > 0) { for (int k = 0; k < imgNode.Count; k++) { ImageTag img = imgNode[0] as ImageTag; BaseAttach obj = null; try { if (img.ImageURL.ToLower().Contains("http")) { obj = ToolHtml.GetBaseAttach(img.ImageURL, headName, info.Id); } else { obj = ToolHtml.GetBaseAttach("http://www.gaxqjyzx.com" + img.ImageURL, headName, info.Id); } } catch { } if (obj != null) { ToolDb.SaveEntity(obj, string.Empty); } } } } } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <NotifyInfo>(); int pageInt = 1, sqlCount = 0; string html = string.Empty; string viewState = string.Empty; string cookiestr = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default, ref cookiestr); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "gridview_PagerRow"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode.AsString().GetRegexBegEnd("/", " "); pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { viewState = this.ToolWebSite.GetAspNetViewState(html); eventValidation = this.ToolWebSite.GetAspNetEventValidation(html); NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection( new string[] { "__EVENTTARGET", "__EVENTARGUMENT", "__LASTFOCUS", "__VIEWSTATE", "__VIEWSTATEGENERATOR", "__EVENTVALIDATION", "keyTextBox", "PagerControl1:_ctl4", "PagerControl1:_ctl2.x", "PagerControl1:_ctl2.y" }, new string[] { "", "", "", viewState, "7CE136E4", eventValidation, "", "", "3", "5" } ); try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MyGridView1"))); if (listNode != null && listNode.Count > 0) { TableTag table = listNode[0] as TableTag; for (int j = 1; j < table.RowCount; j++) { string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty; TableRow tr = table.Rows[j]; ATag aTag = tr.Columns[1].GetATag(); headName = aTag.LinkText; releaseTime = tr.Columns[2].ToPlainTextString().GetDateRegex(); infoUrl = aTag.Link; string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString(); } catch { Logger.Error(headName); Logger.Error(pageInt); continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "PopupBody_context"))); if (dtlNode != null && dtlNode.Count > 0) { if (Encoding.Default.GetByteCount(headName) > 200) { headName = headName.Substring(0, 100); } ctxHtml = dtlNode.AsHtml(); infoCtx = ctxHtml.ToCtxString(); List <string> listImg = new List <string>(); parser = new Parser(new Lexer(ctxHtml)); NodeList imgNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("img")); if (imgNode != null && imgNode.Count > 0) { for (int m = 0; m < imgNode.Count; m++) { string link = "http://publish.bcactc.com" + (imgNode[m] as ImageTag).ImageURL; listImg.Add(link); ctxHtml = ctxHtml.GetReplace((imgNode[m] as ImageTag).ImageURL, link); } } msgType = "北京市建设工程发包承包交易中心"; NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "北京市", "北京市区", "", infoCtx, "通知公告"); sqlCount++; if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate)) { if (listImg.Count > 0) { for (int a = 0; a < listImg.Count; a++) { BaseAttach entity = null; try { entity = ToolHtml.GetBaseAttach(listImg[0], headName, info.Id); if (entity != null) { ToolDb.SaveEntity(entity, string.Empty); } } catch { } } } parser = new Parser(new Lexer(ctxHtml)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag fileATag = aNode[k].GetATag(); if (fileATag.IsAtagAttach()) { BaseAttach obj = null; try { if (fileATag.Link.ToLower().Contains("http")) { obj = ToolHtml.GetBaseAttach(fileATag.Link, headName, info.Id); } else { obj = ToolHtml.GetBaseAttach("http://publish.bcactc.com/" + fileATag.Link, headName, info.Id); } } catch { } if (obj != null) { ToolDb.SaveEntity(obj, string.Empty); } } } } } if (!crawlAll && sqlCount >= this.MaxCount) { return(null); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <InviteInfo>(); int sqlCount = 0; string html = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + (MaxCount + 20)); } catch { return(null); } int startIndex = html.IndexOf("{"); int endIndex = html.LastIndexOf("}"); html = html.Substring(startIndex, (endIndex + 1) - startIndex); JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html); foreach (KeyValuePair <string, object> obj in smsTypeJson) { if (obj.Key == "total") { continue; } object[] array = (object[])obj.Value; foreach (object arrValue in array) { string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, HtmlTxt = string.Empty; Dictionary <string, object> dic = (Dictionary <string, object>)arrValue; code = Convert.ToString(dic["gcBH"]); prjName = Convert.ToString(dic["gcName"]); inviteType = Convert.ToString(dic["gcLeiXing2"]); beginDate = Convert.ToString(dic["ggStartTime2"]); string addUrl = Convert.ToString(dic["detailUrl"]); //https://www.szjsjy.com.cn:8001/jyw/showGongGao.do?ggGuid=03fb1287-935e-4e39-ab1a-35423a81928a&gcbh=&bdbhs= InfoUrl = "https://www.szjsjy.com.cn:8001/jyw/queryOldDataDetail.do?type=1&id=" + Convert.ToString(dic["ggGuid"]); try { try { HtmlTxt = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString().GetReplace("\\t,\\r,\\n,\""); } catch {} if (string.IsNullOrEmpty(HtmlTxt)) { HtmlTxt = this.ToolWebSite.GetHtmlByUrl("https://www.szjsjy.com.cn:8001/jyw/showGongGao.do?ggGuid=" + Convert.ToString(dic["ggGuid"])).GetJsString().GetReplace("\\t,\\r,\\n,\",{maoDian:,}"); Parser dtlparser = new Parser(new Lexer(HtmlTxt)); NodeList dtlNode = dtlparser.ExtractAllNodesThatMatch(new TagNameFilter("table"));//(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "zbgk"))); if (dtlNode != null && dtlNode.Count > 0) { inviteCtx = string.Empty; HtmlTxt = dtlNode.AsHtml(); for (int j = 0; j < dtlNode.Count; j++) { TableTag table = dtlNode[j] as TableTag; for (int r = 0; r < table.RowCount; r++) { for (int c = 0; c < table.Rows[r].ColumnCount; c++) { string temp = table.Rows[r].Columns[c].ToNodePlainString().GetReplace(":,:"); if ((c + 1) % 2 == 0) { inviteCtx += temp + "\r\n"; } else { inviteCtx += temp + ":"; } } } } } } } catch { continue; } if (string.IsNullOrEmpty(inviteCtx)) { inviteCtx = HtmlTxt.GetReplace("<br />,<br/>,</ br>,</br>", "\r\n").ToCtxString() + "\r\n"; } buildUnit = inviteCtx.GetBuildRegex(); if (string.IsNullOrEmpty(buildUnit)) { buildUnit = inviteCtx.Replace(" ", "").GetBuildRegex(); } if (string.IsNullOrEmpty(buildUnit)) { buildUnit = inviteCtx.GetRegex("建 设 单 位"); } specType = "建设工程"; prjAddress = inviteCtx.GetAddressRegex(); if (string.IsNullOrEmpty(prjAddress)) { prjAddress = inviteCtx.Replace(" ", "").GetAddressRegex(); } if (string.IsNullOrEmpty(prjAddress)) { prjAddress = inviteCtx.GetRegex("工 程 地 址"); } msgType = "深圳市建设工程交易中心"; #region 2013-11-19修改 Dictionary <string, Regex> dicRegex = new Dictionary <string, Regex>(); dicRegex.Add("重要提示", new Regex(@"([.\S\s]*)(?=重要提示)")); dicRegex.Add("温馨提示", new Regex(@"([.\S\s]*)(?=温馨提示)")); foreach (string dicValue in dicRegex.Keys) { if (inviteCtx.Contains(dicValue)) { inviteCtx = dicRegex[dicValue].Match(inviteCtx).Value; } } #endregion InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳市工程", string.Empty, string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, string.Empty, addUrl, HtmlTxt); sqlCount++; if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate)) { Parser parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int a = 0; a < aNode.Count; a++) { ATag aTag = aNode[a].GetATag(); if (aTag.Link.Contains("download")) { try { BaseAttach attach = ToolHtml.GetBaseAttach(aTag.Link, aTag.LinkText, info.Id, "SiteManage\\Files\\InviteAttach\\"); if (attach != null) { ToolDb.SaveEntity(attach, "SourceID,AttachServerPath"); } } catch { } } } } } if (!crawlAll && sqlCount >= this.MaxCount) { return(null); } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { //取得页码 int pageInt = 1, sqlCount = 0; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default); } catch (Exception ex) { return(null); } Parser parser = new Parser(new Lexer(html)); NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "dataPager"))); if (pageList != null && pageList.Count > 0) { try { string temp = pageList.AsString().GetRegexBegEnd("共有:", "页"); pageInt = Convert.ToInt32(temp); } catch { pageInt = 1; } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { viewState = this.ToolWebSite.GetAspNetViewState(html); NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection( new string[] { "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "searcher:txtKeyWord", "searcher:tcInputDateTime:txtDateTime1", "searcher:tcInputDateTime:txtDateTime2", "searcher:ddlProvince", "searcher:ddlCity1", "searcher:ddlCity2" }, new string[] { "dataPager", i.ToString(), viewState, "", "", "", "-1", "-1", "-1" } ); html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default).GetJsString(); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "p3"))); if (nodeList != null && nodeList.Count > 0) { TableTag table = null; if (nodeList.Count > 1) { table = nodeList[1] as TableTag; } else { table = nodeList[0] as TableTag; } for (int j = 0; j < table.RowCount; j++) { string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty; TableRow tr = table.Rows[j]; infoType = "政策法规"; headName = tr.Columns[0].ToNodePlainString(); releaseTime = tr.Columns[1].ToPlainTextString().GetDateRegex(); infoUrl = "http://www.sgjsj.gov.cn/sgwebims/" + tr.Columns[0].GetATagValue("onclick").Replace("(", "kdxx").Replace(")", "xxdk").GetRegexBegEnd("kdxx", "xxdk").Replace("\"", ""); string htldtl = string.Empty; try { htldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htldtl)); NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "Table4"))); if (dtlList != null && dtlList.Count > 0) { ctxHtml = dtlList.AsHtml(); infoCtx = ctxHtml.ToCtxString(); msgType = MsgTypeCosnt.ShaoGuanMsgType; NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "韶关市区", string.Empty, infoCtx, infoType); sqlCount++; if (!crawlAll && sqlCount >= this.MaxCount) { return(null); } if (ToolDb.SaveEntity(info, this.ExistCompareFields)) { parser = new Parser(new Lexer(htldtl)); NodeList tabNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "Table1"))); NodeList aNode = null; if (tabNode != null && tabNode.Count > 1) { parser = new Parser(new Lexer(tabNode[1].ToHtml())); aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); } else if (tabNode != null && tabNode.Count > 0) { parser = new Parser(new Lexer(tabNode.AsHtml())); aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); } if (aNode != null && aNode.Count > 0) { for (int a = 0; a < aNode.Count; a++) { ATag aTag = aNode[a] as ATag; if (aTag.IsAtagAttach()) { try { BaseAttach obj = ToolHtml.GetBaseAttach("http://www.sgjsj.gov.cn/sgwebims/" + aTag.Link.Replace("../", "").Replace("./", ""), aTag.LinkText, info.Id); if (obj != null) { ToolDb.SaveEntity(obj, string.Empty); } } catch { } } } } } } } } } return(null); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <NotifyInfo>(); int pageInt = 1, sqlCount = 0; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; string cookiestr = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "style1"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode[0].ToNodePlainString().GetRegexBegEnd("/", "页"); pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "?page=" + i, Encoding.Default); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "99%")), true), new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "100%")))); if (listNode != null && listNode.Count > 0) { for (int j = 0; j < listNode.Count; j++) { TableRow tr = (listNode[j] as TableTag).Rows[0]; string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty; ATag aTag = tr.GetATag(); headName = aTag.LinkText; if (Encoding.Default.GetByteCount(headName) > 200) { headName = headName.Substring(0, 99); } infoUrl = "http://www.hnsztb.com.cn/gsgg/" + aTag.Link; string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "800"))); if (dtlNode != null && dtlNode.Count > 0) { TableTag table = dtlNode[0] as TableTag; if (table.RowCount > 1) { ctxHtml = table.Rows[1].ToHtml(); } else { ctxHtml = table.ToHtml(); } infoCtx = ctxHtml.ToCtxString(); releaseTime = infoCtx.GetDateRegex(); if (string.IsNullOrEmpty(releaseTime)) { releaseTime = infoCtx.GetDateRegex("yyyy年MM月dd日"); } if (string.IsNullOrEmpty(releaseTime)) { releaseTime = infoCtx.GetDateRegex("yyyy/MM/dd"); } if (string.IsNullOrEmpty(releaseTime)) { releaseTime = infoCtx.GetChinaTime(); } msgType = "河南省建设工程招标投标协会"; infoType = "通知公告"; NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "河南省", "河南省及地市", string.Empty, infoCtx, infoType); sqlCount++; if (crawlAll && sqlCount >= this.MaxCount) { return(null); } if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate)) { parser = new Parser(new Lexer(ctxHtml)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k].GetATag(); if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = aTag.Link; } else { link = "http://www.hnsztb.com.cn/" + a.Link; } BaseAttach entity = null; try { entity = ToolHtml.GetBaseAttach(link, a.LinkText, info.Id); if (entity == null) { entity = ToolHtml.GetBaseAttachByUrl(link, a.LinkText, info.Id); } if (entity != null) { ToolDb.SaveEntity(entity, string.Empty); } } catch { } } } } } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { int sqlCount = 0; IList list = new List <BidInfo>(); string html = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + this.MaxCount); } catch { return(null); } int startIndex = html.IndexOf("{"); int endIndex = html.LastIndexOf("}"); html = html.Substring(startIndex, (endIndex + 1) - startIndex); JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html); foreach (KeyValuePair <string, object> obj in smsTypeJson) { if (obj.Key == "total") { continue; } object[] array = (object[])obj.Value; foreach (object arrValue in array) { string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; Dictionary <string, object> dic = (Dictionary <string, object>)arrValue; code = Convert.ToString(dic["bdBH"]); prjName = Convert.ToString(dic["bdName"]); beginDate = Convert.ToString(dic["fabuTime2"]); string saveUrl = Convert.ToString(dic["detailUrl"]); //if (!prjName.Contains("一片一路一街一景")) //{ // continue; //} InfoUrl = "https://www.szjsjy.com.cn:8001/jyw-lg/jyxx/queryOldOTDataDetail.do?type=4&id=" + dic["bdBH"]; List <Dictionary <string, object> > listAttachs = new List <Dictionary <string, object> >(); bool isJson = false; try { HtmlTxt = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString().GetReplace("\\t,\\r,\\n,\""); if (string.IsNullOrEmpty(HtmlTxt)) { isJson = true; string url = "https://www.szjsjy.com.cn:8001/jyw-lg/jyxx/queryZbgs.do?guid=" + dic["dbZhongBiaoJieGuoGuid"] + "&ggGuid=&bdGuid="; string htmldtl = this.ToolWebSite.GetHtmlByUrl(url); Dictionary <string, object> dtlJsons = (Dictionary <string, object>)serializer.DeserializeObject(htmldtl); buildUnit = Convert.ToString(dtlJsons["zbrAndLht"]); bidUnit = Convert.ToString(dtlJsons["tbrName"]); bidMoney = Convert.ToString(dtlJsons["zhongBiaoJE"]); try { bidMoney = (decimal.Parse(bidMoney) / 1000000).ToString(); } catch { } prjMgr = Convert.ToString(dtlJsons["xiangMuJiLi"]); Dictionary <string, object> gg = null; try { gg = dtlJsons["gg"] as Dictionary <string, object>; } catch { } Dictionary <string, object> bd = null; Dictionary <string, object> gc = null; Dictionary <string, object> xm = null; try { bd = dtlJsons["bd"] as Dictionary <string, object>; } catch { } try { gc = bd["gc"] as Dictionary <string, object>; } catch { } try { xm = bd["xm"] as Dictionary <string, object>; } catch { } string htl = this.ToolWebSite.GetHtmlByUrl(saveUrl); Parser parser = new Parser(new Lexer(htl)); NodeList nodelist = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "right_bg"))); if (nodelist != null && nodelist.Count > 0) { HtmlTxt = nodelist.AsHtml(); try { HtmlTxt = HtmlTxt.GetReplace("<span id=\"gcBH\"></span>", "<span id=\"gcBH\">" + code + "</span>"); } catch { } try { HtmlTxt = HtmlTxt.GetReplace("<span id=\"gcName\"></span>", "<span id=\"gcBH\">" + gc["gcName"] + "</span>"); } catch { } try { HtmlTxt = HtmlTxt.GetReplace("<span id=\"bdName\"></span>", "<span id=\"bdName\">" + prjName + "</span>"); } catch { } try { HtmlTxt = HtmlTxt.GetReplace("<span id=\"xmBH\"></span>", "<span id=\"xmBH\">" + xm["xm_BH"] + "</span>"); } catch { } try { HtmlTxt = HtmlTxt.GetReplace("<span id=\"xmName\"></span>", "<span id=\"xmName\">" + xm["xm_Name"] + "</span>"); } catch { } try { long zbgsStartTime = Convert.ToInt64(dtlJsons["zbgsStartTime"]); HtmlTxt = HtmlTxt.GetReplace("<span id=\"zbgsStartTime\"></span>", "<span id=\"zbgsStartTime\">" + ToolHtml.GetDateTimeByLong(zbgsStartTime) + "</span>"); } catch { } try { HtmlTxt = HtmlTxt.GetReplace("<span id=\"zbRName\"></span>", "<span id=\"zbRName\">" + gc["zbRName"] + "</span>"); } catch { } try { HtmlTxt = HtmlTxt.GetReplace("<span id=\"zbdlJG\"></span>", "<span id=\"zbdlJG\">" + gc["creatorName"] + "</span>"); } catch { } try { HtmlTxt = HtmlTxt.GetReplace("<span id=\"tbrName\"></span>", "<span id=\"tbrName\">" + dtlJsons["tbrName"] + "</span>"); } catch { } try { HtmlTxt = HtmlTxt.GetReplace("<span id=\"zhongBiaoJE\"></span>", "<span id=\"zhongBiaoJE\">" + bidMoney + "万元</span>"); } catch { } try { HtmlTxt = HtmlTxt.GetReplace("<span id=\"zhongBiaoGQ\"></span>", "<span id=\"zhongBiaoGQ\">" + dtlJsons["zhongBiaoGQ"] + "</span>"); } catch { } try { HtmlTxt = HtmlTxt.GetReplace("<span id=\"xiangMuJiLi\"></span>", "<span id=\"xiangMuJiLi\">" + prjMgr + "</span>"); } catch { } try { HtmlTxt = HtmlTxt.GetReplace("<span id=\"ziGeDengJi\"></span>", "<span id=\"ziGeDengJi\">" + dtlJsons["ziGeDengJi"] + "</span>"); } catch { } try { HtmlTxt = HtmlTxt.GetReplace("<span id=\"ziGeZhengShu\"></span>", "<span id=\"ziGeZhengShu\">" + dtlJsons["ziGeZhengShu"] + "</span>"); } catch { } try { string zanding = string.IsNullOrWhiteSpace(Convert.ToString(dtlJsons["isZanDingJinE"])) ? "否" : "是"; HtmlTxt = HtmlTxt.GetReplace("<span id=\"isZanDingJinE\"></span>", "<span id=\"isZanDingJinE\">" + zanding + "</span>"); } catch { } } try { string fileUrl = "https://www.szjsjy.com.cn:8001/jyw-lg/jyxx/filegroup/queryByGroupGuidZS.do?groupGuid=" + dtlJsons["ztbFileGroupGuid"]; string fileJson = this.ToolWebSite.GetHtmlByUrl(fileUrl); Dictionary <string, object> fileDic = (Dictionary <string, object>)serializer.DeserializeObject(fileJson); object[] objFile = fileDic["rows"] as object[]; foreach (object file in objFile) { Dictionary <string, object> attach = file as Dictionary <string, object>; listAttachs.Add(attach); } } catch { } } } catch { continue; } bidCtx = HtmlTxt.Replace("<br />", "\r\n").ToCtxString(); if (!isJson) { buildUnit = bidCtx.GetBuildRegex(); bidUnit = bidCtx.GetBidRegex(); bidMoney = bidCtx.GetMoneyRegex(); prjMgr = bidCtx.GetMgrRegex(); if (string.IsNullOrEmpty(prjMgr)) { prjMgr = bidCtx.GetRegex("项目负责"); } } msgType = "深圳市建设工程交易中心龙岗分中心"; specType = "建设工程"; bidType = ToolHtml.GetInviteTypes(prjName); prjName = ToolDb.GetPrjName(prjName); BidInfo info = ToolDb.GenBidInfo("广东省", "深圳龙岗区工程", "龙岗区", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, saveUrl, prjMgr, HtmlTxt); if (!crawlAll && sqlCount >= this.MaxCount) { return(null); } sqlCount++; if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx)) { if (!isJson) { Parser parser = new Parser(new Lexer(HtmlTxt)); NodeList fileNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (fileNode != null && fileNode.Count > 0) { for (int f = 0; f < fileNode.Count; f++) { ATag tag = fileNode[f] as ATag; if (tag.IsAtagAttach() || tag.Link.ToLower().Contains("downloadfile")) { try { BaseAttach attach = null; string link = string.Empty; if (tag.Link.ToLower().Contains("http")) { link = tag.Link; if (link.StartsWith("\\")) { link = link.Substring(link.IndexOf("\\"), link.Length - link.IndexOf("\\")); } if (link.EndsWith("//")) { link = link.Remove(link.LastIndexOf("//")); } link = link.GetReplace("\\", ""); } else { link = "https://www.szjsjy.com.cn:8001/" + tag.Link; } attach = ToolHtml.GetBaseAttachByUrl(link, tag.LinkText, info.Id, "SiteManage\\Files\\InviteAttach\\"); if (attach != null) { ToolDb.SaveEntity(attach, "SourceID,AttachServerPath"); } } catch { continue; } } } } } else if (listAttachs.Count > 0) { foreach (Dictionary <string, object> attach in listAttachs) { BaseAttach attachBase = null; try { string attachName = Convert.ToString(attach["attachName"]); string attachId = Convert.ToString(attach["attachGuid"]); string link = "https://www.szjsjy.com.cn:8001/file/downloadFile?fileId=" + attachId; attachBase = ToolHtml.GetBaseAttach(link, attachName, info.Id, "SiteManage\\Files\\InviteAttach\\"); if (attachBase != null) { ToolDb.SaveEntity(attachBase, "SourceID,AttachServerPath"); } } catch { } } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <NotifyInfo>(); Dictionary <string, string> dic = new Dictionary <string, string>(); //dic.Add("盐田区", "http://yt.szzfcg.cn/portal/topicView.do?method=view&id=50074439"); dic.Add("龙华新区", "http://lhxq.szzfcg.cn/portal/topicView.do?method=view&id=110074439"); dic.Add("大鹏新区", "http://dp.szzfcg.cn/portal/topicView.do?method=view&id=100074439"); dic.Add("坪山新区", "http://ps.szzfcg.cn/portal/topicView.do?method=view&id=90074439"); dic.Add("龙岗区", "http://lg.szzfcg.cn/portal/topicView.do?method=view&id=70074439"); dic.Add("光明新区", "http://gm.szzfcg.cn/portal/topicView.do?method=view&id=10170626"); dic.Add("福田区", "http://ft.szzfcg.cn/portal/topicView.do?method=view&id=30074439"); dic.Add("罗湖区", "http://lh.szzfcg.cn/portal/topicView.do?method=view&id=20074439"); dic.Add("南山区", "http://ns.szzfcg.cn/portal/topicView.do?method=view&id=40074439"); Dictionary <string, string> dicCity = new Dictionary <string, string>(); //dicCity.Add("盐田区", "yt"); dicCity.Add("龙华新区", "lhxq"); dicCity.Add("大鹏新区", "dp"); dicCity.Add("坪山新区", "ps"); dicCity.Add("龙岗区", "lg"); dicCity.Add("光明新区", "gm"); dicCity.Add("福田区", "ft"); dicCity.Add("罗湖区", "lh"); dicCity.Add("南山区", "ns"); foreach (string key in dic.Keys) { int pageInt = 1, sqlCount = 0; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(dic[key]); } catch { continue; } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("name", "__ec_pages"))); if (pageNode != null && pageNode.Count > 0) { SelectTag select = pageNode[0] as SelectTag; try { pageInt = int.Parse(select.OptionTags[select.OptionTags.Length - 1].Value); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { string id = dic[key].Substring(dic[key].IndexOf("id"), dic[key].Length - dic[key].IndexOf("id")).Replace("id=", ""); NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "ec_i", "topicChrList_20070702_crd", "topicChrList_20070702_f_a", "topicChrList_20070702_p", "topicChrList_20070702_s_name", "topicChrList_20070702_s_topName", "id", "method", "__ec_pages", "topicChrList_20070702_rd", "topicChrList_20070702_f_name", "topicChrList_20070702_f_topName", "topicChrList_20070702_f_ldate", }, new string[] { "topicChrList_20070702", "20", "", i.ToString(), "", "", id, "view", i.ToString(), "20", "", "", "" }); html = this.ToolWebSite.GetHtmlByUrl(dic[key], nvc); } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "topicChrList_20070702_table"))); if (listNode != null & listNode.Count > 0) { TableTag table = listNode[0] as TableTag; for (int j = 3; j < table.RowCount; j++) { string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty; TableRow tr = table.Rows[j]; headName = tr.Columns[1].ToNodePlainString(); releaseTime = tr.Columns[3].ToPlainTextString(); infoType = "通知公告"; msgType = "深圳市" + key + "政府采购中心"; infoUrl = "http://" + dicCity[key] + ".szzfcg.cn" + tr.Columns[1].GetATagHref(); string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("align", "center"))); if (dtlNode != null && dtlNode.Count > 0) { ctxHtml = dtlNode[0].ToHtml(); infoCtx = ctxHtml.ToCtxString(); NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "深圳政府采购", key, infoCtx, infoType); sqlCount++; if (!crawlAll && sqlCount >= this.MaxCount) { goto type; } ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate); } else { parser.Reset(); NodeList bodyNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("body")); if (bodyNode != null && bodyNode.Count > 0) { ctxHtml = bodyNode.AsHtml(); infoCtx = ctxHtml.ToCtxString(); NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "深圳政府采购", key, infoCtx, infoType); sqlCount++; if (!crawlAll && sqlCount >= this.MaxCount) { return(null); } if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate)) { parser.Reset(); NodeList imgList = parser.ExtractAllNodesThatMatch(new TagNameFilter("img")); if (imgList != null && imgList.Count > 0) { for (int m = 0; m < imgList.Count; m++) { try { ImageTag img = imgList[m] as ImageTag; string src = img.GetAttribute("src"); BaseAttach obj = null; if (src.Contains("http")) { obj = ToolHtml.GetBaseAttach(src, headName, info.Id); } else { obj = ToolHtml.GetBaseAttach("http://" + dicCity[key] + ".szzfcg.cn" + src, headName, info.Id); } if (obj != null) { ToolDb.SaveEntity(obj, string.Empty); } } catch { } } } } } } } } } type : continue; } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { //取得页码 int pageInt = 1, sqlCount = 0; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8); } catch (Exception ex) { return(null); } Parser parser = new Parser(new Lexer(html)); NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("a"), new HasAttributeFilter("id", "PageDataList__ctl7_LinkButton1"))); if (pageList != null && pageList.Count > 0) { try { string temp = pageList.AsString(); pageInt = Convert.ToInt32(temp.GetRegexBegEnd("共", "页")); } catch { pageInt = 1; } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { viewState = this.ToolWebSite.GetAspNetViewState(html); NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection( new string[] { "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "head1:username", "head1:Password", "head1:rbLoginType", "Tb_keyword", "ddlNewsType", "ddlistaddnewsdate" }, new string[] { "PageDataList$_ctl" + (i + 1).ToString() + "$LinkButton1", "", viewState, "", "", "unit", "", "20", "" } ); html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", " tb_list"))); if (nodeList != null && nodeList.Count > 0) { TableTag table = nodeList[0] as TableTag; for (int j = 0; j < table.RowCount; j++) { string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty; TableRow tr = table.Rows[j]; infoType = "通知公告"; releaseTime = tr.Columns[2].ToPlainTextString().GetDateRegex(); headName = tr.Columns[1].ToNodePlainString(); infoUrl = "http://www.szpark.com.cn" + tr.Columns[1].GetATagHref(); string htldtl = string.Empty; try { htldtl = ToolHtml.GetHtmlByUrl(infoUrl, Encoding.UTF8).GetJsString(); } catch { } parser = new Parser(new Lexer(htldtl)); NodeList noList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "newsinfo"))); if (noList != null && noList.Count > 0) { ctxHtml = noList.AsHtml().Replace("<br/>", "\r\n").Replace("<BR/>", ""); infoCtx = ctxHtml.ToCtxString().Replace(" ", "").Replace(" ", "").Replace("\t\t", "\t").Replace("\t\t", "\t"); infoCtx = Regex.Replace(infoCtx, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase).Replace(" ", "").Replace("\t", "").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n"); msgType = MsgTypeCosnt.ShenZhenFJYLMsgType; infoScorce = infoScorce.Replace(" ", ""); NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "深圳市工程", string.Empty, infoCtx, infoType); sqlCount++; if (!crawlAll && sqlCount >= this.MaxCount) { return(null); } if (ToolDb.SaveEntity(info, this.ExistCompareFields)) { parser = new Parser(new Lexer(ctxHtml)); NodeList imgList = parser.ExtractAllNodesThatMatch(new TagNameFilter("img")); if (imgList != null && imgList.Count > 0) { for (int m = 0; m < imgList.Count; m++) { try { ImageTag img = imgList[m] as ImageTag; string src = img.GetAttribute("src"); if (src.ToLower().Contains(".gif")) { continue; } BaseAttach obj = null; if (src.Contains("http")) { obj = ToolHtml.GetBaseAttach(src, headName, info.Id); } else { obj = ToolHtml.GetBaseAttach("http://www.szpark.com.cn" + src.Replace("../", "/").Replace("./", "/"), headName, info.Id); } if (obj != null) { ToolDb.SaveEntity(obj, string.Empty); } } catch { } } } parser = new Parser(new Lexer(ctxHtml)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int a = 0; a < aNode.Count; a++) { ATag aTag = aNode[a] as ATag; if (aTag.IsAtagAttach()) { try { BaseAttach obj = null; string href = aTag.GetATagHref(); if (href.Contains("http")) { obj = ToolHtml.GetBaseAttach(href, aTag.LinkText, info.Id); } else { obj = ToolHtml.GetBaseAttach("http://www.szpark.com.cn" + href.Replace("../", "/").Replace("./", "/"), aTag.LinkText, info.Id); } if (obj != null) { ToolDb.SaveEntity(obj, string.Empty); } } catch { } } } } } } } } } return(null); }
protected override IList ExecuteCrawl(bool crawlAll) { //取得页码 int pageInt = 1, sqlCount = 0; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default); } catch (Exception ex) { return(null); } Parser parser = new Parser(new Lexer(html)); NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("id", "dnn_ctr467_ArticleList_cboPages")), true), new TagNameFilter("option"))); if (pageList != null && pageList.Count > 0) { try { pageInt = pageList.Count; } catch { pageInt = 1; } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { viewState = this.ToolWebSite.GetAspNetViewState(html); NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection( new string[] { "__EVENTARGUMENT", "dnn:ctr467:ArticleList:cboPages", "ScrollTop", "__dnnVariable", "__VIEWSTATE" }, new string[] { "", (i - 1).ToString(), "", "", viewState } ); html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.Default); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "dnn_ctr467_ArticleList_PanelA")), true), new TagNameFilter("table"))); if (nodeList != null && nodeList.Count > 0) { TableTag table = nodeList[0] as TableTag; for (int j = 0; j < table.RowCount; j++) { string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty; TableRow tr = table.Rows[j]; infoType = "通知公告"; releaseTime = "20" + tr.Columns[2].ToPlainTextString().GetDateRegex("yy-MM-dd"); headName = tr.Columns[1].ToNodePlainString(); infoUrl = "http://www.szmea.net" + tr.Columns[1].GetATagHref(); string htldtl = string.Empty; try { htldtl = ToolHtml.GetHtmlByUrl(SiteUrl, infoUrl, Encoding.Default).GetJsString(); //ToolHtml.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString(); } catch { } parser = new Parser(new Lexer(htldtl)); NodeList noList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "dnn_ctr391_ArticleShow_lblContent"))); if (noList != null && noList.Count > 0) { ctxHtml = noList.AsHtml().Replace("<br/>", "\r\n").Replace("<BR/>", ""); infoCtx = ctxHtml.ToCtxString().Replace(" ", "").Replace(" ", "").Replace("\t\t", "\t").Replace("\t\t", "\t"); infoCtx = Regex.Replace(infoCtx, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase).Replace(" ", "").Replace("\t", "").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n"); msgType = MsgTypeCosnt.ShenZhenJLGCMsgType; infoScorce = infoScorce.Replace(" ", ""); NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "深圳市工程", string.Empty, infoCtx, infoType); sqlCount++; if (!crawlAll && sqlCount >= this.MaxCount) { return(null); } if (ToolDb.SaveEntity(info, this.ExistCompareFields)) { parser = new Parser(new Lexer(ctxHtml)); NodeList imgList = parser.ExtractAllNodesThatMatch(new TagNameFilter("img")); if (imgList != null && imgList.Count > 0) { for (int m = 0; m < imgList.Count; m++) { try { ImageTag img = imgList[m] as ImageTag; string src = img.GetAttribute("src"); if (src.ToLower().Contains(".gif")) { continue; } BaseAttach obj = null; if (src.Contains("http")) { obj = ToolHtml.GetBaseAttach(src, headName, info.Id); } else { obj = ToolHtml.GetBaseAttach("http://www.szmea.net" + src.Replace("../", "/").Replace("./", "/"), headName, info.Id); } if (obj != null) { ToolDb.SaveEntity(obj, string.Empty); } } catch { } } } parser = new Parser(new Lexer(ctxHtml)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int a = 0; a < aNode.Count; a++) { ATag aTag = aNode[a] as ATag; if (aTag.IsAtagAttach()) { try { BaseAttach obj = null; string href = aTag.GetATagHref(); if (href.Contains("http")) { obj = ToolHtml.GetBaseAttach(href, aTag.LinkText, info.Id); } else { obj = ToolHtml.GetBaseAttach("http://www.szmea.net" + href.Replace("../", "/").Replace("./", "/"), aTag.LinkText, info.Id); } if (obj != null) { ToolDb.SaveEntity(obj, string.Empty); } } catch { continue; } } } } } } } } } return(null); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <BidSituation>(); string html = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int pageInt = 1, sqlCount = 0; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl("http://www.szjsjy.com.cn/HomePage.aspx", Encoding.UTF8, ref cookiestr); viewState = this.ToolWebSite.GetAspNetViewState(html); eventValidation = this.ToolWebSite.GetAspNetEventValidation(html); NameValueCollection n = this.ToolWebSite.GetNameValueCollection( new string[] { "__VIEWSTATE", "__VIEWSTATEENCRYPTED", "__EVENTVALIDATION", "TextBox1", "ddl", "DDL_Govt", "DDL_Trade", "txtText", "hdnSN", "ImageButton2.x", "ImageButton2.y" }, new string[] { viewState, "", eventValidation, "请输入关键字", "0", "0", "0", "CN=年度施工投标人7,OU=1007,L=深圳市,ST=广东省,C=CN", "241EDFC1BA276AA7", "19", "13" } ); string tempCookie = string.Empty; html = this.ToolWebSite.GetHtmlByUrl("http://www.szjsjy.com.cn/HomePage.aspx", n , Encoding.UTF8, ref tempCookie); cookiestr = tempCookie.Replace("path=/;", "").Replace("HttpOnly,", "").Replace("HttpOnly", "").Replace(" ", ""); //"_gscu_485601283=265607704dljg167; _gscs_485601283=32711103yul0an14|pv:5;" + tempCookie.Replace("path=/;", "").Replace("HttpOnly,", "").Replace("HttpOnly", "").Replace(" ", ""); //tempCookie = tempCookie.Replace("path=/;", "").Replace("HttpOnly,", "").Replace("HttpOnly", "").Replace(" ", ""); html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_Content_GridView1"))); if (pageNode != null && pageNode.Count > 0) { TableTag table = pageNode[0] as TableTag; try { string temp = table.Rows[table.RowCount - 1].ToNodePlainString().GetRegexBegEnd(",共", "页"); pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { viewState = this.ToolWebSite.GetAspNetViewState(html); eventValidation = this.ToolWebSite.GetAspNetEventValidation(html); NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "__VIEWSTATEENCRYPTED", "__EVENTVALIDATION", "ctl00$Content$drpSearchType", "ctl00$Content$txtQymc", "ctl00$Content$hdnOperate", "ctl00$hdnPageCount" }, new string[] { "ctl00$Content$GridView1", "Page$" + i, viewState, "", eventValidation, "0", "", "", pageInt.ToString() }); try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8, ref cookiestr); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_Content_GridView1"))); if (listNode != null && listNode.Count > 0) { TableTag table = listNode[0] as TableTag; for (int j = 1; j < table.RowCount - 1; j++) { string code = string.Empty, prjName = string.Empty, PublicityEndDate = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, ctx = string.Empty, HtmlTxt = string.Empty, beginDate = string.Empty; TableRow tr = table.Rows[j]; code = tr.Columns[1].ToNodePlainString(); prjName = tr.Columns[2].ToNodePlainString(); PublicityEndDate = tr.Columns[3].ToPlainTextString(); beginDate = DateTime.Now.ToString(); InfoUrl = "http://www.szjsjy.com.cn/BusinessInfo/" + tr.Columns[4].GetATagHref(); string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8, ref cookiestr).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "ContentContainer"))); if (dtlNode != null && dtlNode.Count > 0) { HtmlTxt = dtlNode.AsHtml(); ctx = HtmlTxt.ToCtxString(); msgType = "深圳市建设工程交易中心"; BidSituation info = ToolDb.GetBidSituation("广东省", "深圳市工程", "", code, prjName, PublicityEndDate, msgType, InfoUrl, ctx, HtmlTxt, beginDate); sqlCount++; if (!crawlAll && sqlCount >= this.MaxCount) { return(list); } if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx)) { parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int d = 0; d < aNode.Count; d++) { ATag aTag = aNode[0] as ATag; if (!aTag.IsAtagAttach()) { continue; } string url = "http://www.szjsjy.com.cn/" + aTag.Link.Replace("../", ""); BaseAttach attach = null; try { attach = ToolHtml.GetBaseAttach(url, aTag.LinkText, info.Id, "SiteManage\\Files\\Attach\\"); if (attach == null) { attach = ToolHtml.GetBaseAttach(url, aTag.LinkText, info.Id, "SiteManage\\Files\\Attach\\"); } } catch { } if (attach != null) { ToolDb.SaveEntity(attach, string.Empty); } } } } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <BidSituation>(); string html = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int pageInt = 1, sqlCount = 0; string eventValidation = string.Empty; string tempCookie = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl("http://jyzx.cb.gov.cn/LGjyzxWeb/SiteManage/Index.aspx", Encoding.UTF8, ref cookiestr); viewState = this.ToolWebSite.GetAspNetViewState(html); eventValidation = this.ToolWebSite.GetAspNetEventValidation(html); NameValueCollection n = this.ToolWebSite.GetNameValueCollection( new string[] { "ctl00$ScriptManager1", "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "ctl00$cph_context$Login1$hfCertTitle", "ctl00$cph_context$DropDownList1", "ctl00$cph_context$DropDownList2", "select3", "textfield", "ctl00$cph_context$Login1$btnLogin.x", "ctl00$cph_context$Login1$btnLogin.y" }, new string[] { "ctl00$cph_context$Login1$upLogin|ctl00$cph_context$Login1$btnLogin", "", "", viewState, "CN=年度施工投标人7,OU=1007,L=深圳市,ST=广东省,C=CN", "", "", "=全文检索=", "输入查询内容", "22", "8" } ); html = this.ToolWebSite.GetHtmlByUrl("http://jyzx.cb.gov.cn/LGjyzxWeb/SiteManage/Index.aspx", n, Encoding.UTF8, ref tempCookie); html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref tempCookie); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "inside_table3_bottom"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode[0].ToPlainTextString().GetRegexBegEnd(",共", "页"); pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { viewState = this.ToolWebSite.GetAspNetViewState(html); NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "ctl00$ScriptManager1", "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "ctl00$cph_context$KBQKGSList$ddlSearchType", "ctl00$cph_context$KBQKGSList$txtQymc", "ctl00$cph_context$KBQKGSList$GridViewPaging1$txtGridViewPagingForwardTo", "__VIEWSTATEENCRYPTED", "ctl00$cph_context$KBQKGSList$GridViewPaging1$btnNext.x", "ctl00$cph_context$KBQKGSList$GridViewPaging1$btnNext.y" }, new string[] { "ctl00$cph_context$KBQKGSList$UpdatePanel2|ctl00$cph_context$KBQKGSList$GridViewPaging1$btnNext", "", "", viewState, "A.Gcbh", "", (i - 1).ToString(), "", "5", "6" }); try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8, ref tempCookie); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_cph_context_KBQKGSList_GridView1"))); if (listNode != null && listNode.Count > 0) { TableTag table = listNode[0] as TableTag; for (int j = 1; j < table.RowCount; j++) { string code = string.Empty, prjName = string.Empty, PublicityEndDate = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, ctx = string.Empty, HtmlTxt = string.Empty, beginDate = string.Empty; TableRow tr = table.Rows[j]; code = tr.Columns[1].ToNodePlainString(); prjName = tr.Columns[2].ToNodePlainString(); PublicityEndDate = tr.Columns[3].ToPlainTextString(); beginDate = DateTime.Now.ToString(); InfoUrl = "http://jyzx.cb.gov.cn/LGjyzxWeb/SiteManage/" + tr.Columns[4].GetATagHref(); string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8, ref tempCookie).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "listtable"))); if (dtlNode != null && dtlNode.Count > 0) { HtmlTxt = dtlNode.AsHtml(); ctx = HtmlTxt.ToCtxString(); msgType = "深圳市建设工程交易中心龙岗分中心"; BidSituation info = ToolDb.GetBidSituation("广东省", "深圳龙岗区工程", "龙岗区", code, prjName, PublicityEndDate, msgType, InfoUrl, ctx, HtmlTxt, beginDate); sqlCount++; if (!crawlAll && sqlCount >= this.MaxCount) { return(list); } if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx)) { if (this.ExistsUpdate) { object id = ToolDb.ExecuteScalar(string.Format("select Id from BidSituation where InfoUrl='{0}'", info.InfoUrl)); if (id != null) { string sql = string.Format("delete from BaseAttach where SourceID='{0}'", id); ToolDb.ExecuteSql(sql); } } parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int d = 0; d < aNode.Count; d++) { ATag aTag = aNode[0] as ATag; if (!aTag.IsAtagAttach()) { continue; } string url = "http://jyzx.cb.gov.cn/LGjyzxWeb/" + aTag.Link.Replace("../", ""); BaseAttach attach = null; try { attach = ToolHtml.GetBaseAttach(url, aTag.LinkText, info.Id, "SiteManage\\Files\\Attach\\"); if (attach == null) { attach = ToolHtml.GetBaseAttach(url, aTag.LinkText, info.Id, "SiteManage\\Files\\Attach\\"); } } catch { } if (attach != null) { ToolDb.SaveEntity(attach, string.Empty); } } } } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <BidSituation>(); int sqlCount = 0; string html = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + this.MaxCount); } catch { return(null); } int startIndex = html.IndexOf("{"); int endIndex = html.LastIndexOf("}"); html = html.Substring(startIndex, (endIndex + 1) - startIndex); JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html); object[] objvalues = smsTypeJson["rows"] as object[]; foreach (object objValue in objvalues) { Dictionary <string, object> dic = (Dictionary <string, object>)objValue; string code = string.Empty, prjName = string.Empty, PublicityEndDate = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, ctx = string.Empty, HtmlTxt = string.Empty, beginDate = string.Empty; code = Convert.ToString(dic["bdBH"]); prjName = Convert.ToString(dic["bdName"]); beginDate = Convert.ToString(dic["faBuTime2"]); string idt = Convert.ToString(dic["bdGuid"]); InfoUrl = Convert.ToString(dic["detailUrl"]); string attachJson = string.Empty; try { string urll = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/queryOldOTDataDetail.do?type=5&id=" + idt; HtmlTxt = this.ToolWebSite.GetHtmlByUrl(urll).GetJsString().GetReplace("\\t,\\r,\\n,\""); if (string.IsNullOrWhiteSpace(HtmlTxt)) { string kdGuid = Convert.ToString(dic["kbJiLuGuid"]); InfoUrl = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/kbJiLu_View.do?kbJiLuGuid=" + kdGuid; HtmlTxt = this.ToolWebSite.GetHtmlByUrl(InfoUrl); string url = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/querykbJiLuDetail.do?ggGuid=&bdGuid=&kbJiLuGuid=" + kdGuid; attachJson = this.ToolWebSite.GetHtmlByUrl(url); } } catch (Exception ex) { continue; } string gcBh = string.Empty, gcName = string.Empty, gcLeixing = string.Empty, jywTime = string.Empty, kbjiGuid = string.Empty, surl = string.Empty, attachId = string.Empty, attachFileGroupGuid = string.Empty; if (!string.IsNullOrWhiteSpace(attachJson)) { JavaScriptSerializer newSerializer = new JavaScriptSerializer(); Dictionary <string, object> newTypeJson = (Dictionary <string, object>)newSerializer.DeserializeObject(attachJson); Dictionary <string, object> kdInfo = (Dictionary <string, object>)newTypeJson["kbJiLu"]; try { attachId = Convert.ToString(kdInfo["kbJiLuGuid"]); attachFileGroupGuid = Convert.ToString(kdInfo["attachFileGroupGuid"]); } catch { } gcLeixing = Convert.ToString(kdInfo["gcLeiXing"]); jywTime = Convert.ToString(kdInfo["jywFaBuEndTime"]); //https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/kbJiLu_View.do?kbJiLuGuid=9cb75eb8-66b6-441c-9686-471dfa357ff5 surl = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/kbJiLu_View.do?kbJiLuGuid=" + attachFileGroupGuid; attachJson = this.ToolWebSite.GetHtmlByUrl(surl); HtmlTxt = attachJson; Parser parserNew = new Parser(new Lexer(HtmlTxt)); NodeList tableNode = parserNew.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "de_tab1"))); if (tableNode != null && tableNode.Count > 0) { HtmlTxt = tableNode.AsHtml(); HtmlTxt = HtmlTxt.GetReplace("<td id=\"bdBH\"> </td>", "<td id=\"bdBH\"> " + code + "</td>"); HtmlTxt = HtmlTxt.GetReplace("<td id=\"bdName\"> </td>", "<td id=\"bdName\"> " + prjName + "</td>"); HtmlTxt = HtmlTxt.GetReplace("<td id=\"gcLeiXing\"> </td>", "<td id=\"gcLeiXing\"> " + gcLeixing + "</td>"); HtmlTxt = HtmlTxt.GetReplace("<td id=\"jieZhiTime\"> </td>", "<td id=\"jieZhiTime\"> " + jywTime + "</td>"); ctx = HtmlTxt.Replace("</tr>", "\r\n").ToCtxString(); } } ctx = HtmlTxt.ToCtxString(); string saveUrl = Convert.ToString(dic["detailUrl"]); msgType = "深圳市建设工程交易中心宝安分中心"; BidSituation info = ToolDb.GetBidSituation("广东省", "深圳宝安区工程", "宝安区", code, prjName, PublicityEndDate, msgType, InfoUrl, ctx, HtmlTxt, beginDate); sqlCount++; if (!crawlAll && sqlCount >= this.MaxCount) { return(list); } if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx)) { if (!string.IsNullOrWhiteSpace(attachFileGroupGuid)) { string moJson = string.Empty; string sUrl = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/filegroup/queryByGroupGuidZS.do?groupGuid=" + attachFileGroupGuid; try { moJson = this.ToolWebSite.GetHtmlByUrl(sUrl); } catch { } if (!string.IsNullOrWhiteSpace(moJson)) { JavaScriptSerializer newSerializers = new JavaScriptSerializer(); Dictionary <string, object> newTypeJsons = (Dictionary <string, object>)newSerializers.DeserializeObject(moJson); Dictionary <string, object> mofo = (Dictionary <string, object>)newTypeJsons; object[] objs = (object[])mofo["rows"]; foreach (object objAttach in objs) { Dictionary <string, object> attachs = (Dictionary <string, object>)objAttach; string attachguid = Convert.ToString(attachs["attachGuid"]); string attachName = Convert.ToString(attachs["attachName"]); string link = "https://www.szjsjy.com.cn:8001/file/downloadFile?fileId=" + attachguid; BaseAttach attach = ToolHtml.GetBaseAttach(link, attachName, info.Id, "SiteManage\\Files\\Attach\\"); if (attach != null) { ToolDb.SaveEntity(attach, "SourceID,AttachServerPath"); } } } } else { Parser parser = new Parser(new Lexer(HtmlTxt)); NodeList fileNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (fileNode != null && fileNode.Count > 0) { for (int f = 0; f < fileNode.Count; f++) { ATag tag = fileNode[f] as ATag; try { BaseAttach attach = null; string link = string.Empty; if (tag.Link.ToLower().Contains("http")) { link = tag.Link; if (link.Contains("\\")) { link = link.Replace("\\", ""); } } else { link = "https://www.szjsjy.com.cn:8001/" + tag.Link; } attach = ToolHtml.GetBaseAttach(link, tag.LinkText, info.Id, "SiteManage\\Files\\Attach\\"); if (attach != null) { ToolDb.SaveEntity(attach, "SourceID,AttachServerPath"); } } catch { continue; } } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <NotifyInfo>(); int pageInt = 1, sqlCount = 0; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; string cookiestr = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("select")); if (pageNode != null && pageNode.Count > 0) { SelectTag selTag = pageNode[0] as SelectTag; try { string temp = selTag.OptionTags[selTag.OptionTags.Length - 1].OptionText; pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl("http://www.yantian.gov.cn/icatalog/qzf/08/tzgg/index_" + (i - 1).ToString() + ".shtml"); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "100%"))); if (listNode != null && listNode.Count > 0) { TableTag table = listNode[0] as TableTag; for (int j = 1; j < table.RowCount; j++) { string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty; TableRow tr = table.Rows[j]; ATag aTag = tr.Columns[2].GetATag(); headName = aTag.GetAttribute("title"); releaseTime = tr.Columns[3].ToPlainTextString().GetDateRegex(); infoUrl = "http://www.yantian.gov.cn" + aTag.Link; string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "content"))); if (dtlNode != null && dtlNode.Count > 0) { ctxHtml = dtlNode.AsHtml(); infoCtx = ctxHtml.ToCtxString(); msgType = "深圳市盐田区政府采购中心"; NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "深圳区及街道工程", "盐田区", infoCtx, "通知公告"); sqlCount++; if (!crawlAll && sqlCount >= this.MaxCount) { return(null); } if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate)) { parser = new Parser(new Lexer(ctxHtml)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag fileATag = aNode[k].GetATag(); if (fileATag.IsAtagAttach()) { BaseAttach obj = null; try { if (fileATag.Link.ToLower().Contains("http")) { obj = ToolHtml.GetBaseAttach(fileATag.Link, headName, info.Id); } else { obj = ToolHtml.GetBaseAttach("http://www.yantian.gov.cn/" + fileATag.Link, headName, info.Id); } } catch { } if (obj != null) { ToolDb.SaveEntity(obj, string.Empty); } } } } else { parser.Reset(); NodeList imgNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("img")); if (imgNode != null && imgNode.Count > 0) { for (int k = 0; k < imgNode.Count; k++) { ImageTag img = imgNode[0] as ImageTag; BaseAttach obj = null; try { if (img.ImageURL.ToLower().Contains("http")) { obj = ToolHtml.GetBaseAttach(img.ImageURL, headName, info.Id); } else { obj = ToolHtml.GetBaseAttach("http://www.yantian.gov.cn/" + img.ImageURL, headName, info.Id); } } catch { } if (obj != null) { ToolDb.SaveEntity(obj, string.Empty); } } } } } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; string cookieStr = string.Empty; int sqlCount = 0; int pageInt = 1; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "cn6"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode.AsString().Replace("(", "kdxx").GetRegexBegEnd("kdxx", ","); pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl("http://www.gdzbtb.gov.cn/pbbgbd/pingbiaobaogao_" + (i - 1).ToString() + ".htm", Encoding.Default); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "position2")), true), new TagNameFilter("li"))); if (nodeList != null && nodeList.Count > 0) { for (int j = 0; j < nodeList.Count; j++) { string bProv = string.Empty, bCity = string.Empty, bArea = string.Empty, bPrjno = string.Empty, bPrjname = string.Empty, bBidresultendtime = string.Empty, bBaseprice = string.Empty, bBiddate = string.Empty, bBuildunit = string.Empty, bBidmethod = string.Empty, bRemark = string.Empty, bInfourl = string.Empty; bPrjname = nodeList[j].GetATagValue("title"); if (bPrjname.Contains("广东省")) { bCity = "广州市区"; bPrjname = bPrjname.Replace("[", "").Replace("]-", "").Replace("]", "").Replace("广东省", ""); } else { string temp = bPrjname.Replace("[", "kdxx").Replace("]", "xxdk").GetRegexBegEnd("kdxx", "xxdk"); bPrjname = bPrjname.Replace("[", "").Replace("]-", "").Replace("]", "").Replace(temp, ""); bCity = temp + "区"; } bInfourl = "http://www.gdzbtb.gov.cn/pbbgbd/" + nodeList[j].GetATagHref().Replace("../", "").Replace("./", ""); string htldtl = string.Empty; try { htldtl = this.ToolWebSite.GetHtmlByUrl(bInfourl, Encoding.Default); } catch { continue; } parser = new Parser(new Lexer(htldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellSpacing", "1"))); if (dtlNode != null && dtlNode.Count > 0) { string htmlTxt = dtlNode.AsHtml(); bBiddate = htmlTxt.GetDateRegex(); if (string.IsNullOrEmpty(bBiddate)) { bBiddate = DateTime.Now.ToString("yyyy-MM-dd"); } string attachUrl = string.Empty; int len1 = 0, len2 = 0; len1 = htldtl.IndexOf("$(\"#pbbg_shongti\")"); len2 = htldtl.IndexOf("</a>"); string aurl = string.Empty; string attachName = string.Empty; if (len1 > 0 && len2 > 0) { aurl = htldtl.Substring(len1, len2 - len1) + "</a>"; parser = new Parser(new Lexer(aurl)); NodeList atagNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (atagNode != null && atagNode.Count > 0) { ATag aTag = atagNode.GetATag(); attachUrl = aTag.Link; attachName = aTag.LinkText; } } if (string.IsNullOrEmpty(attachName)) { attachName = bPrjname; } BidProject info = ToolDb.GenResultProject("广东省", bCity, "", bPrjno, bPrjname, bBidresultendtime, bBaseprice, bBiddate, bBuildunit, bBidmethod, bRemark, bInfourl); if (!crawlAll && sqlCount >= this.MaxCount) { return(list); } sqlCount++; string sql = string.Format("select Id from BidProject where 1=1 and InfoUrl='{0}'", info.InfoUrl); string result = Convert.ToString(ToolDb.ExecuteScalar(sql)); if (!string.IsNullOrEmpty(result)) { if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate)) { if (!string.IsNullOrEmpty(attachUrl)) { string fileUrl = string.Empty; try { fileUrl = DateTime.Parse(bBiddate).ToString("yyyyMM"); } catch { fileUrl = DateTime.Now.ToString("yyyyMM"); } string alink = "http://www.gdzbtb.gov.cn/pbbgbd/" + fileUrl + "/" + attachUrl.Replace("../", "").Replace("./", ""); BaseAttach attach = null; try { attach = ToolHtml.GetBaseAttach(alink, attachName, info.Id, "SiteManage\\Files\\Attach\\"); if (attach == null) { attach = ToolHtml.GetBaseAttachByUrl(alink, attachName, info.Id, "SiteManage\\Files\\Attach\\"); } } catch { } if (attach != null) { string sqlDelete = string.Format("delete from BaseAttach where SourceId='{0}'", result); ToolDb.ExecuteSql(sqlDelete); ToolDb.SaveEntity(attach, "SourceID,AttachServerPath"); } } } } else { if (ToolDb.SaveEntity(info, this.ExistCompareFields)) { if (!string.IsNullOrEmpty(attachUrl)) { string fileUrl = string.Empty; try { fileUrl = DateTime.Parse(bBiddate).ToString("yyyyMM"); } catch { fileUrl = DateTime.Now.ToString("yyyyMM"); } string alink = "http://www.gdzbtb.gov.cn/pbbgbd/" + fileUrl + "/" + attachUrl.Replace("../", "").Replace("./", ""); BaseAttach attach = null; try { attach = ToolHtml.GetBaseAttach(alink, attachName, info.Id, "SiteManage\\Files\\Attach\\"); if (attach == null) { attach = ToolHtml.GetBaseAttachByUrl(alink, attachName, info.Id, "SiteManage\\Files\\Attach\\"); } } catch { } if (attach != null) { ToolDb.SaveEntity(attach, "SourceID,AttachServerPath"); } } } } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; string cookiestr = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8); } catch { return(list); } int startIndex = html.IndexOf("<xml"); int endIndex = html.IndexOf("</xml>"); string xmlstr = html.Substring(startIndex, endIndex - startIndex).ToLower().GetReplace("infourl", "span").GetReplace("info", "div").GetReplace("publishedtime", "p"); Parser parser = new Parser(new Lexer(xmlstr)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("div")); if (pageNode != null && pageNode.Count > 0) { for (int i = 0; i < pageNode.Count; i++) { parser = new Parser(new Lexer(pageNode[i].ToHtml())); NodeList dateNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("p")); string beginDate = dateNode[0].ToPlainTextString().GetDateRegex(); parser.Reset(); NodeList urlNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("span")); string infoUrl = "http://dongcheng.dg.gov.cn/publicfiles//business/htmlfiles/" + urlNode[0].ToPlainTextString(); string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList titleNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("title")); string prjName = titleNode[0].ToNodePlainString(); if (prjName.Contains("_")) { prjName = prjName.Remove(prjName.IndexOf("_")); } if (prjName.Contains("中标")) { string buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; parser.Reset(); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("valign", "top"))); if (dtlNode != null && dtlNode.Count > 0) { HtmlTxt = dtlNode.AsHtml(); bidCtx = HtmlTxt.GetReplace("</p>", "\r\n").ToCtxString(); buildUnit = bidCtx.GetBuildRegex(); bidUnit = bidCtx.GetBidRegex(); bidMoney = bidCtx.GetRegex("中标值").GetMoney(); if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0") { bidMoney = bidCtx.GetMoneyRegex(); } prjMgr = bidCtx.GetMgrRegex(); code = bidCtx.GetCodeRegex(); specType = "政府采购"; bidType = prjName.GetInviteBidType(); msgType = "东莞市东城区办事处"; BidInfo info = ToolDb.GenBidInfo("广东省", "东莞市区", "东城区", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, infoUrl, prjMgr, HtmlTxt); list.Add(info); parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k].GetATag(); if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link; } else { link = "http://dongcheng.dg.gov.cn/" + a.Link; } BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link); base.AttachList.Add(attach); } } } } } else if (prjName.Contains("通知")) { string InfoTitle = string.Empty, InfoType = string.Empty, PublistTime = string.Empty, InfoCtx = string.Empty, prjCode = string.Empty, buildUnit = string.Empty, htmlTxt = string.Empty, area = string.Empty; parser.Reset(); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("valign", "top"))); if (dtlNode != null && dtlNode.Count > 0) { InfoTitle = prjName; PublistTime = beginDate; htmlTxt = dtlNode.AsHtml(); Parser imgParser = new Parser(new Lexer(htmlTxt.ToLower())); NodeList imgNode = imgParser.ExtractAllNodesThatMatch(new TagNameFilter("img")); string src = string.Empty; if (imgNode != null && imgNode.Count > 0) { string imgUrl = (imgNode[0] as ImageTag).GetAttribute("src"); src = "http://dongcheng.dg.gov.cn/" + imgUrl; htmlTxt = htmlTxt.ToLower().GetReplace(imgUrl, src); } InfoCtx = htmlTxt.ToCtxString(); NoticeInfo info = ToolDb.GenNoticeInfo("广东省", "东莞市区", "东城区", string.Empty, InfoTitle, InfoType, InfoCtx, PublistTime, string.Empty, "东莞市东城区办事处", infoUrl, prjCode, buildUnit, string.Empty, string.Empty, "政府采购", string.Empty, htmlTxt); list.Add(info); if (!string.IsNullOrEmpty(src)) { string sql = string.Format("select Id from InviteInfo where InfoUrl='{0}'", info.InfoUrl); object obj = ToolDb.ExecuteScalar(sql); if (obj == null || obj.ToString() == "") { try { BaseAttach attach = ToolHtml.GetBaseAttach(src, prjName, info.Id, "SiteManage\\Files\\InviteAttach\\"); if (attach != null) { ToolDb.SaveEntity(attach, ""); } } catch { } } } parser = new Parser(new Lexer(htmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k].GetATag(); if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link; } else { link = "http://dongcheng.dg.gov.cn/" + a.Link; } BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link); base.AttachList.Add(attach); } } } } } else { string code = string.Empty, buildUnit = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty; parser.Reset(); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("valign", "top"))); if (dtlNode != null && dtlNode.Count > 0) { HtmlTxt = dtlNode[0].ToHtml(); Parser imgParser = new Parser(new Lexer(HtmlTxt.ToLower())); NodeList imgNode = imgParser.ExtractAllNodesThatMatch(new TagNameFilter("img")); string src = string.Empty; if (imgNode != null && imgNode.Count > 0) { string imgUrl = (imgNode[0] as ImageTag).GetAttribute("src"); src = "http://dongcheng.dg.gov.cn/" + imgUrl; HtmlTxt = HtmlTxt.ToLower().GetReplace(imgUrl, src); } inviteCtx = HtmlTxt.GetReplace("</p>", "\r\n").ToCtxString(); buildUnit = inviteCtx.GetBuildRegex(); prjAddress = inviteCtx.GetAddressRegex(); code = inviteCtx.GetCodeRegex(); specType = "政府采购"; inviteType = prjName.GetInviteBidType(); msgType = "东莞市东城区办事处"; InviteInfo info = ToolDb.GenInviteInfo("广东省", "东莞市区", "东城区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, infoUrl, HtmlTxt); list.Add(info); if (!string.IsNullOrEmpty(src)) { string sql = string.Format("select Id from InviteInfo where InfoUrl='{0}'", info.InfoUrl); object obj = ToolDb.ExecuteScalar(sql); if (obj == null || obj.ToString() == "") { try { BaseAttach attach = ToolHtml.GetBaseAttach(src, prjName, info.Id, "SiteManage\\Files\\InviteAttach\\"); if (attach != null) { ToolDb.SaveEntity(attach, ""); } } catch { } } } parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k].GetATag(); if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link; } else { link = "http://dongcheng.dg.gov.cn/" + a.Link; } BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link); base.AttachList.Add(attach); } } } } } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); int sqlCount = 0; //取得页码 int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = ToolHtml.GetHtmlByUrlEncode(SiteUrl, Encoding.UTF8); } catch (Exception ex) { Logger.Error(ex.ToString()); return(list); } Parser parser = new Parser(new Lexer(html)); NodeList sNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("cellspacing", "2"), new TagNameFilter("table"))); if (sNode != null && sNode.Count > 0) { string pageString = sNode.AsString(); Regex regexPage = new Regex(@",共[^页]+页,"); Match pageMatch = regexPage.Match(pageString); try { pageInt = int.Parse(pageMatch.Value.Replace(",共", "").Replace("页,", "").Trim()); } catch (Exception) { } } string cookiestr = string.Empty; for (int i = 1; i <= pageInt; i++) { if (i > 1) { viewState = this.ToolWebSite.GetAspNetViewState(html); eventValidation = this.ToolWebSite.GetAspNetEventValidation(html); NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "__VIEWSTATEENCRYPTED", "__EVENTVALIDATION", "ctl00$hdnPageCount" }, new string[] { "ctl00$Content$GridView1", "Page$" + i.ToString(), viewState, "", eventValidation, pageInt.ToString() }); html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8); } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "ctl00_Content_GridView1"), new TagNameFilter("table"))); if (nodeList != null && nodeList.Count > 0) { TableTag table = nodeList[0] as TableTag; for (int j = 1; j < table.RowCount - 1; j++) { string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, HtmlTxt = string.Empty; TableRow tr = table.Rows[j] as TableRow; code = tr.Columns[1].ToPlainTextString().Trim(); prjName = tr.Columns[2].ToPlainTextString().Trim(); buildUnit = tr.Columns[3].ToPlainTextString().Trim(); beginDate = tr.Columns[5].ToPlainTextString().Trim(); endDate = tr.Columns[6].ToPlainTextString().Trim(); ATag aTag = tr.Columns[2].Children[0] as ATag; InfoUrl = "http://www.szjsjy.com.cn/BusinessInfo/" + aTag.Link; string htmldetail = string.Empty; try { htmldetail = ToolHtml.GetHtmlByUrlEncode(InfoUrl, Encoding.UTF8).Replace(" ", "").Trim(); Parser dtlparserHTML = new Parser(new Lexer(htmldetail)); NodeList dtnodeHTML = dtlparserHTML.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "lblXXNR"), new TagNameFilter("span"))); HtmlTxt = dtnodeHTML.AsHtml(); htmldetail = ToolHtml.GetHtmlByUrlEncode(InfoUrl, Encoding.UTF8).Replace(" ", "").Replace("</br>", "\r\n").Replace("<br>", "\r\n"); } catch (Exception ex) { continue; } Parser dtlparser = new Parser(new Lexer(htmldetail)); NodeList dtnode = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "lblXXNR"), new TagNameFilter("span"))); inviteCtx = dtnode.AsString().Replace(" ", ""); Regex regPrjAdd = new Regex(@"(工程地点|工程地址):[^\r\n]+[\r\n]{1}"); prjAddress = regPrjAdd.Match(inviteCtx).Value.Replace("工程地点:", "").Replace("工程地址:", "").Trim(); msgType = "深圳市建设工程交易中心"; specType = "建设工程"; Regex regInvType = new Regex(@"[^\r\n]+[\r\n]{1}"); string InvType = regInvType.Match(inviteCtx).Value; inviteType = ToolHtml.GetInviteTypes(InvType); #region 2013-11-19修改 Dictionary <string, Regex> dicRegex = new Dictionary <string, Regex>(); dicRegex.Add("重要提示", new Regex(@"([.\S\s]*)(?=重要提示)")); dicRegex.Add("温馨提示", new Regex(@"([.\S\s]*)(?=温馨提示)")); foreach (string dicValue in dicRegex.Keys) { if (inviteCtx.Contains(dicValue)) { inviteCtx = dicRegex[dicValue].Match(inviteCtx).Value; } } #endregion InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳市工程", string.Empty, string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, string.Empty, InfoUrl, HtmlTxt); if (!crawlAll && sqlCount >= this.MaxCount) { return(null); } sqlCount++; if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx)) { dtlparser.Reset(); NodeList dlNodes = dtlparser.ExtractAllNodesThatMatch(new TagNameFilter("a"));// if (dlNodes != null && dlNodes.Count > 0) { for (int f = 0; f < dlNodes.Count; f++) { ATag fileTag = dlNodes[f] as ATag; if (fileTag.IsAtagAttach()) { //BaseAttach attach = ToolDb.GenBaseAttach(fileTag.StringText, info.Id, fileTag.Link.Replace("..", "http://www.szjsjy.com.cn")); try { BaseAttach attach = ToolHtml.GetBaseAttach(fileTag.Link.Replace("..", "http://www.szjsjy.com.cn"), fileTag.LinkText, info.Id, "SiteManage\\Files\\InviteAttach\\"); if (attach != null) { ToolDb.SaveEntity(attach, "SourceID,AttachServerPath"); } } catch { } } } } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { //取得页码 int pageInt = 1, sqlCount = 0; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl); } catch { return(null); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("name", "PageListControl1$ctl06"))); if (pageNode != null && pageNode.Count > 0) { try { SelectTag tag = pageNode[0] as SelectTag; string temp = tag.OptionTags[tag.OptionTags.Length - 1].Value; pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { viewState = this.ToolWebSite.GetAspNetViewState(html); eventValidation = this.ToolWebSite.GetAspNetEventValidation(html); NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "__VIEWSTATE", "__EVENTVALIDATION", "PageListControl1$ctl03", "PageListControl1$ctl06", "select2" }, new string[] { viewState, eventValidation, "下一页", (i - 1).ToString(), "** 站点链接 **" }); try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "Listbody"))); if (listNode != null && listNode.Count > 0) { TableTag table = listNode[0] as TableTag; for (int j = 0; j < table.RowCount - 1; j++) { string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty; msgType = "上海市建筑业管理办公室"; infoType = "通知公告"; TableRow tr = table.Rows[j]; ATag aTag = tr.Columns[0].GetATag(); headName = aTag.LinkText.GetReplace("·, "); releaseTime = tr.Columns[1].ToPlainTextString().GetDateRegex(); infoUrl = "http://www.ciac.sh.cn/newsdata/" + aTag.GetAttribute("onclick").GetRegexBegEnd("'", "'"); if (infoUrl.IsAtagAttach()) { NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "上海市", "上海市区", string.Empty, infoCtx, infoType); sqlCount++; if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate)) { BaseAttach entity = null; try { entity = ToolHtml.GetBaseAttach(infoUrl, headName, info.Id); if (entity != null) { ToolDb.SaveEntity(entity, string.Empty); } } catch { } } if (!crawlAll && sqlCount >= this.MaxCount) { return(null); } continue; } string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("width", "771"))); if (dtlNode != null && dtlNode.Count > 0) { ctxHtml = dtlNode.AsHtml(); if (headName.Contains("...")) { parser = new Parser(new Lexer(ctxHtml)); NodeList pNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("p"), new HasAttributeFilter("class", "bb"))); if (pNode != null && pNode.Count > 0) { string temp = pNode[0].ToNodePlainString(); headName = string.IsNullOrEmpty(temp) ? headName : temp; } } infoCtx = ctxHtml.ToCtxString(); List <string> listImg = new List <string>(); parser = new Parser(new Lexer(ctxHtml)); NodeList imgNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("img")); if (imgNode != null && imgNode.Count > 0) { for (int m = 0; m < imgNode.Count; m++) { string link = "http://www.ciac.sh.cn/newsdata/" + (imgNode[m] as ImageTag).ImageURL; listImg.Add(link); ctxHtml = ctxHtml.GetReplace((imgNode[m] as ImageTag).ImageURL, link); } } NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "上海市", "上海市区", string.Empty, infoCtx, infoType); sqlCount++; if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate)) { if (listImg.Count > 0) { for (int a = 0; a < listImg.Count; a++) { BaseAttach entity = null; try { entity = ToolHtml.GetBaseAttach(listImg[0], headName, info.Id); if (entity != null) { ToolDb.SaveEntity(entity, string.Empty); } } catch { } } } parser = new Parser(new Lexer(ctxHtml)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k].GetATag(); if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link; } else { link = "http://www.ciac.sh.cn/newsdata/" + a.Link; } BaseAttach entity = null; try { entity = ToolHtml.GetBaseAttach(link, a.LinkText, info.Id); if (entity != null) { ToolDb.SaveEntity(entity, string.Empty); } } catch { } } } } } if (!crawlAll && sqlCount >= this.MaxCount) { return(null); } } } } } return(null); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList sNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "0h120")), true), new TagNameFilter("a"))); if (sNode != null && sNode.Count > 0) { try { string temp = sNode[sNode.Count - 2].ToNodePlainString(); pageInt = Convert.ToInt32(temp.GetReplace("[,]")); } catch { pageInt = 1; } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&page=" + i, Encoding.Default); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList viewList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "0h120"))); if (viewList != null && viewList.Count > 0) { for (int j = 0; j < viewList.Count; j++) { TableTag table = viewList[j] as TableTag; string prjName = string.Empty, InfoUrl = string.Empty, beginDate = string.Empty, HtmlTxt = string.Empty; ATag aTag = viewList[j].GetATag(); if (aTag == null) { continue; } prjName = aTag.GetAttribute("title").Trim().GetReplace(" "); beginDate = table.ToNodePlainString().GetDateRegex(); InfoUrl = "http://huangbu.huidong.gov.cn/" + aTag.Link; string htlDtl = string.Empty; try { htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htlDtl)); NodeList dtl = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "fontzoom"))); if (dtl != null && dtl.Count > 0) { HtmlTxt = dtl.AsHtml(); if (prjName.Contains("中标") || prjName.Contains("成交") || prjName.Contains("结果")) { string buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty; bidCtx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString(); string tempName = bidCtx.GetRegex("工程名称,项目名称"); if (!string.IsNullOrEmpty(tempName)) { prjName = tempName; } code = bidCtx.GetCodeRegex().GetCodeDel(); buildUnit = bidCtx.GetBuildRegex(); if (buildUnit.Contains("招标代理")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理")); } if (buildUnit.Contains("公司")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司"; } bidUnit = bidCtx.GetBidRegex(); if (string.IsNullOrEmpty(bidUnit)) { bidUnit = bidCtx.GetRegex("中标候选公司,中标候选人"); } bidMoney = bidCtx.GetMoneyRegex(); if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0") { string ctx = bidCtx.GetReplace("元\r\n,元;\r\n", "元kdxx").GetRegexBegEnd("中标价", "kdxx"); bidMoney = ctx.GetMoney("万元"); } try { if (decimal.Parse(bidMoney) > 100000) { bidMoney = (decimal.Parse(bidMoney) / 10000).ToString(); } } catch { } Parser imgParser = new Parser(new Lexer(HtmlTxt.ToLower())); NodeList imgNode = imgParser.ExtractAllNodesThatMatch(new TagNameFilter("img")); string src = string.Empty; if (imgNode != null && imgNode.Count > 0) { string imgUrl = (imgNode[0] as ImageTag).GetAttribute("src"); src = "http://huangbu.huidong.gov.cn/" + imgUrl; HtmlTxt = HtmlTxt.ToLower().GetReplace(imgUrl, src); } msgType = "惠东县黄埠镇人民政府"; specType = "政府采购"; bidType = prjName.GetInviteBidType(); BidInfo info = ToolDb.GenBidInfo("广东省", "惠州市区", "惠东县", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); if (!string.IsNullOrEmpty(src)) { string sql = string.Format("select Id from BidInfo where InfoUrl='{0}'", info.InfoUrl); object obj = ToolDb.ExecuteScalar(sql); if (obj == null || obj.ToString() == "") { try { BaseAttach attach = ToolHtml.GetBaseAttach(src, prjName, info.Id, "SiteManage\\Files\\InviteAttach\\"); if (attach != null) { ToolDb.SaveEntity(attach, ""); } } catch { } } } parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k].GetATag(); if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link; } else { link = "http://huangbu.huidong.gov.cn/" + a.Link; } BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link); base.AttachList.Add(attach); } } } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } else { string code = string.Empty, buildUnit = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty; inviteCtx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString(); string tempName = inviteCtx.GetRegex("工程名称,项目名称"); if (!string.IsNullOrEmpty(tempName)) { prjName = tempName; } inviteType = prjName.GetInviteBidType(); code = inviteCtx.GetCodeRegex().GetCodeDel(); buildUnit = inviteCtx.GetBuildRegex(); prjAddress = inviteCtx.GetAddressRegex(); if (buildUnit.Contains("招标代理")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理")); } if (buildUnit.Contains("公司")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司"; } Parser imgParser = new Parser(new Lexer(HtmlTxt.ToLower())); NodeList imgNode = imgParser.ExtractAllNodesThatMatch(new TagNameFilter("img")); string src = string.Empty; if (imgNode != null && imgNode.Count > 0) { string imgUrl = (imgNode[0] as ImageTag).GetAttribute("src"); src = "http://huangbu.huidong.gov.cn/" + imgUrl; HtmlTxt = HtmlTxt.ToLower().GetReplace(imgUrl, src); } msgType = "惠东县黄埠镇人民政府"; specType = "政府采购"; InviteInfo info = ToolDb.GenInviteInfo("广东省", "惠州市区", "惠东县", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt); list.Add(info); if (!string.IsNullOrEmpty(src)) { string sql = string.Format("select Id from InviteInfo where InfoUrl='{0}'", info.InfoUrl); object obj = ToolDb.ExecuteScalar(sql); if (obj == null || obj.ToString() == "") { try { BaseAttach attach = ToolHtml.GetBaseAttach(src, prjName, info.Id, "SiteManage\\Files\\InviteAttach\\"); if (attach != null) { ToolDb.SaveEntity(attach, ""); } } catch { } } } parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k].GetATag(); if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link; } else { link = "http://huangbu.huidong.gov.cn/" + a.Link; } BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link); base.AttachList.Add(attach); } } } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <ProjectResult>(); int sqlCount = 0; string html = string.Empty; List <Dictionary <string, object> > dicFile = new List <Dictionary <string, object> >(); try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + (MaxCount + 20)); } catch { return(null); } int startIndex = html.IndexOf("{"); int endIndex = html.LastIndexOf("}"); html = html.Substring(startIndex, (endIndex + 1) - startIndex); JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html); foreach (KeyValuePair <string, object> obj in smsTypeJson) { if (obj.Key == "total") { continue; } object[] array = (object[])obj.Value; foreach (object arrValue in array) { string Code = string.Empty, prjName = string.Empty, BuildUnit = string.Empty, FinalistsWay = string.Empty, RevStaMethod = string.Empty, SetStaMethod = string.Empty, VoteMethod = string.Empty, RevStaDate = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty, ProjectCtx = string.Empty, HtmlTxt = string.Empty, beginDate = string.Empty, attachFileGroupGuid = string.Empty, dbJieGuoGuid = string.Empty, ggGuid = string.Empty, bdGuid = string.Empty, gcLeiXing = string.Empty, zbrName = string.Empty, zhongBiaoJia = string.Empty, jsonHtml = string.Empty; Dictionary <string, object> dic = (Dictionary <string, object>)arrValue; Code = Convert.ToString(dic["bdBH"]); prjName = Convert.ToString(dic["bdName"]); //if (!prjName.Contains("茅洲河(光明新区)水环境综合整治工程项目(水景观")) continue; beginDate = Convert.ToString(dic["createTime2"]); InfoUrl = Convert.ToString(dic["detailUrl"]); dbJieGuoGuid = Convert.ToString(dic["dbJieGuoGuid"]); ggGuid = Convert.ToString(dic["ggGuid"]); bdGuid = Convert.ToString(dic["bdGuid"]); gcLeiXing = Convert.ToString(dic["gcLeiXing"]); //zbrName = Convert.ToString(dic["zbrName"]); zhongBiaoJia = Convert.ToString(dic["zhongBiaoJia"]).GetMoney(); string crawlUrl = "https://www.szjsjy.com.cn:8001/jyw/queryOldDataDetail.do?type=9&id=" + Code; try { jsonHtml = this.ToolWebSite.GetHtmlByUrl(crawlUrl).GetJsString().GetReplace("\\t,\\r,\\n,\""); } catch { } if (!jsonHtml.Contains("<div") || string.IsNullOrEmpty(jsonHtml)) { try { if (string.IsNullOrEmpty(jsonHtml)) { crawlUrl = "https://www.szjsjy.com.cn:8001/jyw/queryDbJieGuoByGuid.do?guid=" + Convert.ToString(dic["dbJieGuoGuid"]); jsonHtml = this.ToolWebSite.GetHtmlByUrl(crawlUrl).GetJsString().GetReplace("\\t,\\r,\\n,\"").GetReplace(":RMB:", ":"); } string strHtml = PrjResultStr(jsonHtml);//.Replace("A,B,C","") JavaScriptSerializer serializerNew = new JavaScriptSerializer(); Dictionary <string, object> smsTypeJsonNew = null; try { smsTypeJsonNew = (Dictionary <string, object>)serializer.DeserializeObject(strHtml); } catch { try { strHtml = PrjResultStr(jsonHtml, true); smsTypeJsonNew = (Dictionary <string, object>)serializer.DeserializeObject(strHtml); } catch { try { strHtml = GetPrjResultDtl(strHtml); smsTypeJsonNew = (Dictionary <string, object>)serializer.DeserializeObject(strHtml); } catch { } } } string ggBdGuid = string.Empty, dbBanFa = string.Empty, piaoJueBanFa = string.Empty, dbTime = string.Empty, isChouQian = string.Empty, chouQianRuWeiFangShi = string.Empty, rwFangShi = string.Empty, zbName = string.Empty, tongYongZhongBiaoJia = string.Empty, isDuiWaiGongShi = string.Empty, isYiYiTime = string.Empty, Lxr = string.Empty, LxDh = string.Empty, jsDw = string.Empty, ggMc = string.Empty, bdBh = string.Empty, ggShiXiangGuid = string.Empty, isHeSuan = string.Empty, gongQi = string.Empty, isTiJiaoDbwy = string.Empty, isXuYaoZuJianDbwyh = string.Empty; Dictionary <string, object> bd = (Dictionary <string, object>)smsTypeJsonNew["bd"]; Dictionary <string, object> gc = (Dictionary <string, object>)bd["gc"]; try { ggShiXiangGuid = bd["ggShiXiangGuid"].ToString(); } catch { } try { ggMc = bd["bdName"].ToString(); } catch { } try { bdBh = bd["bdBH"].ToString(); } catch { } try { ggBdGuid = smsTypeJsonNew["ggBdGuid"].ToString(); } catch { } try { dbBanFa = smsTypeJsonNew["dbBanFa"].ToString(); } catch { try { dbBanFa = bd["dbBanFa"].ToString(); } catch { } } try { piaoJueBanFa = smsTypeJsonNew["piaoJueBanFa"].ToString(); } catch { } try { dbTime = smsTypeJsonNew["dbTime"].ToString(); dbTime = ToolHtml.GetDateTimeByLong(long.Parse(dbTime)).ToString(); } catch { } try { isChouQian = smsTypeJsonNew["isChouQian"].ToString(); } catch { } try { chouQianRuWeiFangShi = smsTypeJsonNew["chouQianRuWeiFangShi"].ToString(); } catch { } try { rwFangShi = smsTypeJsonNew["rwFangShi"].ToString(); } catch { } try { zbName = smsTypeJsonNew["zbName"].ToString(); } catch { } try { tongYongZhongBiaoJia = smsTypeJsonNew["tongYongZhongBiaoJia"].ToString(); } catch { } try { isDuiWaiGongShi = smsTypeJsonNew["isDuiWaiGongShi"].ToString(); } catch { } try { isYiYiTime = smsTypeJsonNew["isYiYiTime"].ToString(); } catch { } try { isHeSuan = smsTypeJsonNew["isHeSuan"].ToString(); } catch { } try { gongQi = smsTypeJsonNew["gongQi"].ToString(); } catch { } try { isTiJiaoDbwy = smsTypeJsonNew["isTiJiaoDbwy"].ToString(); } catch { } try { isXuYaoZuJianDbwyh = smsTypeJsonNew["isXuYaoZuJianDbwyh"].ToString(); } catch { } try { Lxr = gc["jingBanRenName"].ToString(); } catch { try { Lxr = gc["lianXiRenName"].ToString(); } catch { } } try { LxDh = gc["lianXiRenMobile"].ToString(); } catch { try { LxDh = gc["lianXiRenPhone"].ToString(); } catch { } } try { jsDw = gc["zbRName"].ToString(); } catch { } try { attachFileGroupGuid = smsTypeJsonNew["attachFileGroupGuid"].ToString(); } catch { } if (dbBanFa.IsNumber()) { dbBanFa = "无"; } string dtlHtml = string.Empty; string dtlUrl = "https://www.szjsjy.com.cn:8001/jyw/jyw/dbResult_View.do?bdGuid=" + ggGuid; try { dtlHtml = this.ToolWebSite.GetHtmlByUrl(dtlUrl).GetJsString().GetReplace("\\t,\\r,\\n,\""); if (string.IsNullOrEmpty(dtlHtml) || dtlHtml.Length < 10) { dtlUrl = "https://www.szjsjy.com.cn:8001/jyw/queryPmxtTbrListGs.do?dbGuid=" + ggGuid; dtlHtml = this.ToolWebSite.GetHtmlByUrl(dtlUrl).GetJsString().GetReplace("\\t,\\r,\\n,\""); } } catch { Logger.Error(prjName); continue; } if (!string.IsNullOrEmpty(dtlHtml) && dtlHtml.Length > 10) { HtmlTxt = dtlHtml; Parser parserNew = new Parser(new Lexer(HtmlTxt)); NodeList tableNode = parserNew.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "de_tab1"))); if (tableNode != null && tableNode.Count > 0) { HtmlTxt = tableNode.AsHtml(); HtmlTxt = HtmlTxt.GetReplace("<td id=ggName> </td>", "<td id=\"ggName\"> " + ggMc + "</td>"); HtmlTxt = HtmlTxt.GetReplace("<td id=bdBH> </td>", "<td id=\"bdBH\"> " + bdBh + "</td>"); HtmlTxt = HtmlTxt.GetReplace("<td id=bdName> </td>", "<td id=\"bdName\"> " + ggMc + "</td>"); HtmlTxt = HtmlTxt.GetReplace("<td id=zbRName> </td>", "<td id=\"zbRName\"> " + jsDw + "</td>"); HtmlTxt = HtmlTxt.GetReplace("<td id=zbName> </td>", "<td id=\"zbName\"> " + zbName + "</td>"); HtmlTxt = HtmlTxt.GetReplace("<td id=dbTime> </td>", "<td id=\"dbTime\"> " + dbTime + "</td>"); HtmlTxt = HtmlTxt.GetReplace("<td id=rwfs> </td>", "<td id=\"rwfs\"> " + rwFangShi + "</td>"); HtmlTxt = HtmlTxt.GetReplace("<td id=dbBanFa> </td>", "<td id=\"dbBanFa\"> " + dbBanFa + "</td>"); HtmlTxt = HtmlTxt.GetReplace("<td id=lianXiRenName> </td>", "<td id=\"lianXiRenName\"> " + Lxr + "</td>"); HtmlTxt = HtmlTxt.GetReplace("<td id=lianXiRenPhone> </td>", "<td id=\"lianXiRenName\"> " + LxDh + "</td>"); string resultUrl = "https://www.szjsjy.com.cn:8001/jyw/queryTbrListByBdGuidAndGgGuidForGs.do"; string jsonResult = string.Empty; try { NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "bdGuid", "ggGuid" }, new string[] { bdGuid, ggShiXiangGuid }); jsonResult = this.ToolWebSite.GetHtmlByUrl(resultUrl, nvc).GetJsString().GetReplace("\\t,\\r,\\n,\""); if (string.IsNullOrEmpty(jsonResult) || jsonResult.Length <= 10) { nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "bdGuid", "ggGuid" }, new string[] { bdGuid, ggGuid }); jsonResult = this.ToolWebSite.GetHtmlByUrl(resultUrl, nvc).GetJsString().GetReplace("\\t,\\r,\\n,\""); } } catch (Exception ex) { Logger.Error(prjName); } if (!string.IsNullOrEmpty(jsonResult) && jsonResult.Length >= 10) { string jiHua_LiXiang_BH = string.Empty, jiHua_LiXiang_BH2 = string.Empty; try { Dictionary <string, object> xm = (Dictionary <string, object>)bd["xm"]; jiHua_LiXiang_BH = xm["jiHua_LiXiang_BH"].ToString().GetReplace("【", "[").GetReplace("】", "]"); jiHua_LiXiang_BH2 = xm["jiHua_LiXiang_BH"].ToString(); } catch { } string tempJson = jsonResult; if (!string.IsNullOrEmpty(jiHua_LiXiang_BH)) { tempJson = jsonResult.Replace(jiHua_LiXiang_BH, jiHua_LiXiang_BH2); } string dtlTbName = PrjResultStr(tempJson, true); JavaScriptSerializer serializerDtl = new JavaScriptSerializer(); object[] dtlObj = null; try { dtlObj = (object[])serializerDtl.DeserializeObject(dtlTbName); } catch { try { dtlTbName = dtlTbName.Substring(0, dtlTbName.Length - 2); dtlTbName += "}}]"; dtlObj = (object[])serializerDtl.DeserializeObject(dtlTbName); } catch { try { dtlTbName = dtlTbName.Trim().Replace("},{", "}},{"); dtlObj = (object[])serializerDtl.DeserializeObject(dtlTbName); } catch { Logger.Error(prjName); } } } bool isOk = false; StringBuilder sb = new StringBuilder(); if (dbBanFa.Contains("逐轮票决")) { StringBuilder strZlpj = new StringBuilder(); List <PrjResult> prjResluts = LPrjResult.GetPrjZlResult(dtlObj); IEnumerable <IGrouping <int, PrjResult> > ienums = prjResluts.GroupBy(x => x.lunCiXuHao).OrderBy(x => x.Key); foreach (IGrouping <int, PrjResult> groups in ienums) { strZlpj.AppendFormat("<h3>第{0}大轮投票表</h3>", groups.Key); strZlpj.Append("<table width='100%' border='0' class='de_tab2'>"); strZlpj.Append("<tr>"); strZlpj.Append("<th style='text-align: left' class='bg_tdtop'>编号</th>"); strZlpj.Append("<th style='text-align: left' class='bg_tdtop'>投标单位</th>"); strZlpj.Append("<th style='text-align: left' class='bg_tdtop'>得票数</th>"); strZlpj.Append("<th style='text-align: left' class='bg_tdtop'>排名</th>"); strZlpj.Append("</tr>"); List <PrjResult> results = groups.ToList().OrderBy(x => x.Bh).ToList(); foreach (PrjResult prj in results) { strZlpj.Append("<tr>"); strZlpj.Append("<th style='padding: 0px'>" + prj.Bh + "</th>"); strZlpj.Append("<th style='padding: 0px' class='bg_tdtop'>" + prj.Name + "</th>"); strZlpj.Append("<th style='padding: 0px' class='bg_tdtop'>" + prj.Mc + "</th>"); strZlpj.Append("<th style='padding: 0px' class='bg_tdtop'>" + prj.Xh + "</th>"); strZlpj.Append("</tr>"); isOk = true; } strZlpj.Append("</table>"); } sb.Append(strZlpj.ToString()); } else { StringBuilder strTmp = new StringBuilder(); strTmp.Append("<table width='100%' border='0' class='de_tab2'>"); switch (dbBanFa) { case "直接票决": strTmp.Append("<tr>"); strTmp.Append("<th style='text-align: left' class='bg_tdtop'>编号</th>"); strTmp.Append("<th style='text-align: left' class='bg_tdtop'>投标单位</th>"); strTmp.Append("<th style='text-align: left' class='bg_tdtop'>取胜次数</th>"); strTmp.Append("<th style='text-align: left' class='bg_tdtop'>排名</th>"); strTmp.Append("</tr>"); List <PrjResult> PrjResults = LPrjResult.GetPrjResult(dtlObj); foreach (PrjResult prj in PrjResults) { strTmp.Append("<tr>"); strTmp.Append("<th style='padding: 0px'>" + prj.Bh + "</th>"); strTmp.Append("<th style='padding: 0px' class='bg_tdtop'>" + prj.Name + "</th>"); strTmp.Append("<th style='padding: 0px' class='bg_tdtop'>" + prj.Mc + "</th>"); strTmp.Append("<th style='padding: 0px' class='bg_tdtop'>" + prj.Xh + "</th>"); strTmp.Append("</tr>"); isOk = true; } break; default: strTmp.Append("<tr>"); strTmp.Append("<th style='text-align: left' class='bg_tdtop'>序号</th>"); strTmp.Append("<th style='text-align: left' class='bg_tdtop'>企业名称</th>"); strTmp.Append("<th style='text-align: left' class='bg_tdtop'>投标时间</th>"); strTmp.Append("<th style='text-align: left' class='bg_tdtop'>中标候选人</th>"); strTmp.Append("</tr>"); List <PrjResult> PrjResultBid = LPrjResult.GetPrjResultBid(dtlObj); foreach (PrjResult prj in PrjResultBid) { strTmp.Append("<tr>"); strTmp.Append("<th style='padding: 0px'>" + prj.Xh + "</th>"); strTmp.Append("<th style='padding: 0px' class='bg_tdtop'>" + prj.Name + "</th>"); strTmp.Append("<th style='padding: 0px' class='bg_tdtop'>" + prj.Date + "</th>"); strTmp.Append("<th style='padding: 0px' class='bg_tdtop'>" + prj.IsBid + "</th>"); strTmp.Append("</tr>"); isOk = true; } break; } strTmp.Append("</table>"); sb.Append(strTmp.ToString()); } if (isOk) { HtmlTxt += sb; } } } } } catch { } if (!string.IsNullOrEmpty(attachFileGroupGuid)) { bool FileOk = false; StringBuilder sb = new StringBuilder(); try { sb.Append("<table id=\"wenJian_List\" width=\"100%\" border=\"0\" class=\"de_tab2\">"); sb.Append("<tr>"); sb.Append("<td class=\"bg_tdtop\">序号</td>"); sb.Append("<td class=\"bg_tdtop\" >文件名</td>"); sb.Append("<td class=\"bg_tdtop\">创建时间</td>"); sb.Append("</tr>"); string url = "https://www.szjsjy.com.cn:8001/jyw/filegroup/queryByGroupGuidZS.do?groupGuid=" + attachFileGroupGuid; string attachHtml = this.ToolWebSite.GetHtmlByUrl(url); JavaScriptSerializer newSerializer = new JavaScriptSerializer(); Dictionary <string, object> newTypeJson = (Dictionary <string, object>)newSerializer.DeserializeObject(attachHtml); string attachGuid = string.Empty, attachName = string.Empty, createTime = string.Empty; foreach (KeyValuePair <string, object> newObj in newTypeJson) { object[] newArray = (object[])newObj.Value; int row = 1; foreach (object newArr in newArray) { Dictionary <string, object> newDic = (Dictionary <string, object>)newArr; try { dicFile.Add(newDic); attachGuid = Convert.ToString(newDic["attachGuid"]); attachName = Convert.ToString(newDic["attachName"]); createTime = Convert.ToString(newDic["createTime"]); if (!string.IsNullOrEmpty(createTime)) { createTime = ToolHtml.GetDateTimeByLong(long.Parse(createTime)).ToString(); } string newUrl = "https://www.szjsjy.com.cn:8001/file/downloadFile?fileId=" + attachGuid; string aTag = "<a href='" + newUrl + "' target='_blank'>" + attachName + "</a>"; sb.Append("<tr>"); sb.Append("<td>" + row + "</td>"); sb.Append("<td>" + aTag + "</td>"); sb.Append("<td>" + createTime + "</td>"); sb.Append("</tr>"); row++; FileOk = true; } catch { } } } sb.Append("</table>"); } catch { } if (FileOk) { HtmlTxt += sb.ToString(); } } } else { HtmlTxt = jsonHtml; Parser parserA = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parserA.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int i = 0; i < aNode.Count; i++) { ATag aTag = aNode[i] as ATag; if (aTag.IsAtagAttach()) { Dictionary <string, object> fileDic = new Dictionary <string, object>(); fileDic.Add("attachGuid", aTag.Link.GetReplace("\\")); fileDic.Add("attachName", aTag.LinkText.ToNodeString()); dicFile.Add(fileDic); } } } } ProjectCtx = HtmlTxt.GetReplace("<br />,<br/>,</ br>,</br>", "\r\n").ToCtxString() + "\r\n"; Parser parser = new Parser(new Lexer(HtmlTxt.GetReplace("th", "td"))); NodeList ctxNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table")); if (ctxNode != null && ctxNode.Count > 0) { string dtlCtx = string.Empty; TableTag ctxTable = ctxNode[0] as TableTag; for (int d = 0; d < ctxTable.RowCount; d++) { for (int k = 0; k < ctxTable.Rows[d].ColumnCount; k++) { if ((k + 1) % 2 == 0) { dtlCtx += ctxTable.Rows[d].Columns[k].ToNodePlainString() + "\r\n"; } else { dtlCtx += ctxTable.Rows[d].Columns[k].ToNodePlainString() + ":"; } } } BuildUnit = dtlCtx.GetRegex("建设单位"); FinalistsWay = dtlCtx.GetRegex("入围方式"); RevStaMethod = dtlCtx.GetRegex("评标方法"); SetStaMethod = dtlCtx.GetRegex("定标方法"); VoteMethod = dtlCtx.GetRegex("票决方法"); RevStaDate = dtlCtx.GetRegex("定标时间").GetDateRegex("yyyy/MM/dd"); if (!SetStaMethod.IsChina()) { SetStaMethod = ""; } } MsgType = "深圳市建设工程交易中心"; sqlCount++; if (!crawlAll && sqlCount >= this.MaxCount) { return(list); } ProjectResult info = ToolDb.GetProjectResult("广东省", "深圳市工程", "", Code, prjName, BuildUnit, FinalistsWay, RevStaMethod, SetStaMethod, VoteMethod, RevStaDate, InfoUrl, MsgType, ProjectCtx, HtmlTxt, beginDate); if (prjName.Contains("深圳广电金融中心施工总承包工程")) { string delSql = string.Format("delete from ProjectResult where InfoUrl='{0}'", info.InfoUrl); ToolDb.ExecuteSql(delSql); } if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate)) { if (this.ExistsUpdate) { object id = ToolDb.ExecuteScalar(string.Format("select Id from ProjectResult where InfoUrl='{0}'", info.InfoUrl)); if (id != null) { string sql = string.Format("delete from ProjectResultDtl where SourceId='{0}'", id); ToolDb.ExecuteSql(sql); string sqlAttach = string.Format("delete from BaseAttach where SourceId='{0}'", id); ToolDb.ExecuteSql(sqlAttach); } } if (dicFile.Count > 0) { try { foreach (Dictionary <string, object> newDic in dicFile) { try { string attachGuid = Convert.ToString(newDic["attachGuid"]); string attachName = Convert.ToString(newDic["attachName"]); string newUrl = string.Empty; if (attachGuid.ToLower().Contains("http")) { newUrl = attachGuid; } else { newUrl = "https://www.szjsjy.com.cn:8001/file/downloadFile?fileId=" + attachGuid; } BaseAttach attach = ToolHtml.GetBaseAttach(newUrl, attachName, info.Id, "SiteManage\\Files\\Attach\\"); if (attach == null) { attach = ToolHtml.GetBaseAttach(newUrl, attachName, info.Id, "SiteManage\\Files\\Attach\\"); } if (attach != null) { ToolDb.SaveEntity(attach, string.Empty); } } catch { continue; } } } catch { } } //if (ctxNode != null && ctxNode.Count > 1) //{ // TableTag prjTable = ctxNode[1] as TableTag; // string colName1 = prjTable.Rows[0].Columns[2].ToNodePlainString(); // string colName2 = prjTable.Rows[0].Columns[3].ToNodePlainString(); // for (int c = 2; c < prjTable.RowCount; c++) // { // TableRow dr = prjTable.Rows[c]; // string UnitName = string.Empty, BidDate = string.Empty, IsBid = string.Empty, Ranking = string.Empty, WinNumber = string.Empty, TicketNumber = string.Empty; // UnitName = dr.Columns[1].ToNodePlainString(); // if (colName1.Contains("投标时间") || colName1.Contains("投标日期")) // BidDate = dr.Columns[2].ToPlainTextString(); // else if (colName1.Contains("得票数")) // TicketNumber = dr.Columns[2].ToNodePlainString(); // else if (colName1.Contains("取胜次数")) // WinNumber = dr.Columns[2].ToNodePlainString(); // if (colName2.Contains("排名")) // Ranking = dr.Columns[3].ToNodePlainString(); // else if (colName2.Contains("中标候选人")) // IsBid = dr.Columns[3].ToNodePlainString() == "" ? "0" : "1"; // ProjectResultDtl infoDtl = ToolDb.GetProjectResultDtl(info.Id, UnitName, BidDate, IsBid, Ranking, WinNumber, TicketNumber); // ToolDb.SaveEntity(infoDtl, "SourceId,UnitName", this.ExistsUpdate); // } //} } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { //取得页码 int pageInt = 1, sqlCount = 0; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default); } catch (Exception ex) { return(null); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "pagination page-mar"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode.AsString().GetRegexBegEnd("/共", "页"); pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&page=" + i, Encoding.Default); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "wsbs-table"))); if (listNode != null && listNode.Count > 0) { TableTag table = listNode[0] as TableTag; for (int j = 1; j < table.RowCount; j++) { string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty; TableRow tr = table.Rows[j]; headName = tr.Columns[1].ToNodePlainString(); releaseTime = tr.Columns[2].ToPlainTextString().GetDateRegex(); infoUrl = "http://www.gzggzy.cn" + tr.Columns[1].GetATagHref(); string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "xx-main"))); if (dtlNode != null && dtlNode.Count > 0) { ctxHtml = dtlNode.AsHtml(); infoCtx = ctxHtml.ToCtxString(); msgType = "广州公共资源交易中心"; infoType = "通知公告"; NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "广州市区", string.Empty, infoCtx, infoType); sqlCount++; if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate)) { parser = new Parser(new Lexer(ctxHtml)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag aTag = aNode[k].GetATag(); if (aTag.IsAtagAttach()) { string link = string.Empty; if (aTag.Link.ToLower().Contains("http")) { link = aTag.Link; } else { link = "http://www.gzggzy.cn" + aTag.Link; } BaseAttach entity = null; try { entity = ToolHtml.GetBaseAttach(link, aTag.LinkText, info.Id); if (entity != null) { ToolDb.SaveEntity(entity, string.Empty); } } catch { } } } } } if (crawlAll && sqlCount >= this.MaxCount) { return(null); } } } } } return(null); }
protected override IList ExecuteCrawl(bool crawlAll) { string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; string cookieStr = string.Empty; int pageInt = 1, sqlCount = 0; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default).GetJsString(); } catch (Exception ex) { return(null); } Parser parser = new Parser(new Lexer(html)); NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("width", "700"))); if (pageList != null && pageList.Count > 0) { try { string temp = pageList.AsString().GetRegexBegEnd("/", "下"); pageInt = Convert.ToInt32(temp); } catch { pageInt = 1; } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl + "?page=" + i.ToString(), Encoding.Default); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "98%"))); if (nodeList != null && nodeList.Count > 1) { TableTag table = nodeList[1] as TableTag; for (int j = 0; j < table.RowCount; j++) { string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty; TableRow tr = table.Rows[j]; int attachCount = 0; string temp = tr.Columns[1].GetAttribute("rowSpan"); infoType = "办事指南"; releaseTime = DateTime.Now.ToString("yyyy-MM-dd"); headName = tr.Columns[1].ToNodePlainString(); infoUrl = "http://www.stjs.gov.cn/bsdt/" + tr.Columns[1].GetATagHref(); msgType = MsgTypeCosnt.ShanTouMsgType; NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "汕头市区", string.Empty, infoCtx, infoType); sqlCount++; if (!crawlAll && sqlCount >= this.MaxCount) { return(null); } if (ToolDb.SaveEntity(info, this.ExistCompareFields)) { if (infoUrl.IsAtagAttach()) { try { BaseAttach obj = ToolHtml.GetBaseAttach(infoUrl, headName, info.Id); if (obj != null) { ToolDb.SaveEntity(obj, string.Empty); } } catch { } } if (!string.IsNullOrEmpty(temp)) { attachCount = Convert.ToInt32(temp); for (int a = 0; a < attachCount; a++) { TableRow dr = table.Rows[j]; ATag fileUrl = dr.Columns[dr.ColumnCount - 1].GetATag(); if (fileUrl.IsAtagAttach()) { try { BaseAttach obj = ToolHtml.GetBaseAttach("http://www.stjs.gov.cn/bsdt/" + fileUrl.Link, fileUrl.LinkText, info.Id); if (obj != null) { ToolDb.SaveEntity(obj, string.Empty); } } catch { } } j++; } j--; } } } } } return(null); }