Пример #1
0
        private void SaveAttach(BidProject info, string htmltxt, string result, bool isUpdate)
        {
            List <BaseAttach> list = new List <BaseAttach>();

            if (htmltxt.Contains("http"))
            {
                Parser   parser = new Parser(new Lexer(htmltxt));
                NodeList aNode  = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                if (aNode != null && aNode.Count > 0)
                {
                    for (int j = 0; j < aNode.Count; j++)
                    {
                        ATag   aTag       = aNode[j].GetATag();
                        string attachName = aTag.LinkText;
                        string aurl       = string.Empty;

                        aurl = aTag.Link.GetReplace("\\\"", "");
                        if (string.IsNullOrWhiteSpace(attachName))
                        {
                            attachName = info.PrjName;
                        }
                        try
                        {
                            string   url  = System.Web.HttpUtility.UrlDecode(aurl);
                            string[] urls = url.Split('&');

                            BaseAttach entity = null;
                            if (isUpdate)
                            {
                                entity = ToolHtml.GetBaseAttach(url, attachName, result, "SiteManage\\Files\\Attach\\");
                            }
                            else
                            {
                                entity = ToolHtml.GetBaseAttach(url, attachName, info.Id, "SiteManage\\Files\\Attach\\");
                            }
                            if (entity != null)
                            {
                                list.Add(entity);
                            }
                        }
                        catch { }
                    }
                }
            }

            if (list.Count > 0)
            {
                if (isUpdate)
                {
                    string delSql = string.Format("delete from BaseAttach where SourceID='{0}'", result);
                    ToolFile.Delete(result);
                    int count = ToolDb.ExecuteSql(delSql);
                }
                foreach (BaseAttach attach in list)
                {
                    ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                }
            }
        }
Пример #2
0
        protected void SaveAttach(string url, string sourceId)
        {
            List <BaseAttach> attach    = new List <BaseAttach>();
            string            htmlAnnex = string.Empty;

            try
            {
                htmlAnnex = this.ToolWebSite.GetHtmlByUrl(url, Encoding.UTF8);
            }
            catch { }
            Parser   dtparser = new Parser(new Lexer(htmlAnnex));
            NodeList dtList   = dtparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "ctl00_ContentPlaceHolder1_GridView1"), new TagNameFilter("table")));

            if (dtList != null && dtList.Count > 0)
            {
                TableTag dttable = dtList[0] as TableTag;
                for (int t = 1; t < dttable.RowCount; t++)
                {
                    ATag file = dttable.SearchFor(typeof(ATag), true)[t - 1] as ATag;
                    if (file.IsAtagAttach())
                    {
                        string aurl = "http://www.szjsjy.com.cn/" + file.Link.Replace("../", "").Replace("./", "");
                        try
                        {
                            BaseAttach entity = ToolHtml.GetBaseAttach(aurl, file.LinkText, sourceId, "SiteManage\\Files\\Attach\\");
                            if (entity != null)
                            {
                                attach.Add(entity);
                            }
                        }
                        catch { }
                    }
                }
            }
            if (attach.Count > 0)
            {
                string delSql = string.Format("delete from BaseAttach where SourceID='{0}'", sourceId);
                ToolFile.Delete(sourceId);
                int count = ToolDb.ExecuteSql(delSql);
                ToolDb.SaveDatas(attach, string.Empty);
            }
        }
Пример #3
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

            infoType = "通知公告";
            infoUrl  = this.SiteUrl;
            string htldtl = string.Empty;

            try
            {
                htldtl = ToolHtml.GetHtmlByUrl(infoUrl, Encoding.UTF8).GetJsString();
            }
            catch { }
            headName = "关于转发深圳市住房和建设局转发《深圳市交通运输委港航和货运交通管理局关于我市泥头车运输企业土石方运输业务投标资质考评和异地泥头车备案托管第二阶段情况的通报》的通知";
            ctxHtml  = "<table width='960' background='{root_path}images/xil_jl_05.jpg' border='0' cellspacing='0' cellpadding='0'>  <tbody><tr> <td align='center' background='../../../images/xil_jl_03.jpg' valign='top' style='background-repeat: repeat-x;'><table width='100%' border='0' cellspacing='0' cellpadding='0'>  <tbody><tr> <td width='9%'>&nbsp;</td> <td width='83%' height='25'>&nbsp;</td>  <td width='8%'>&nbsp;</td> </tr> <tr>    <td>&nbsp;</td>   <td valign='top'><table width='100%' border='0' cellspacing='0' cellpadding='0'>      <tbody><tr>  <td width='8%' height='25' class='red12a'>题材分类:</td>  <td width='42%'><a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('catalog1=327')'>通知公告公示</a></td> <td width='8%' class='red12a'>主题分类:</td> <td width='42%'><a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('catalog2=479')'>其他</a></td>      </tr>    <tr>     <td height='25' class='red12a'>发文机构:</td>    <td><span id='fbjgid' style='display: none;'><script>fbjg('深圳市南山区人民政府 ')</script><a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('district=深圳市南山区人民政府')'>深圳市南山区人民政府</a></span></td><script>var wh = ''; wh = wh.replace(/ /ig,''); wh = wh.replace(/ /ig,''); 	if(wh==''||wh==null||'无'==wh){ 	document.getElementById('fbjgid').style.display='none';	}</script>     <td class='red12a'>来源网站发布日期:</td>   <td><a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('urltime=2013.08.12')'>2013-08-12</a></td>   </tr> <tr> <td height='25' class='red12a'>所属地区:</td>    <td><script>ssdq('广东省深圳市 ')</script><a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('vreserved3=广东省深圳市')'>广东省深圳市</a>;</td>      <td class='red12a'>文&nbsp;&nbsp;&nbsp;&nbsp;号:</td>      <td><script type='text/javascript'> 	ycwh(); </script></td>       </tr>   <tr>     <td height='25' class='red12a' valign='top' style='padding-top: 8px;'>关 键 词:</td> <td valign='top' style='line-height: 20px; padding-top: 3px;'><script>gjzsj('深圳市;泥头车;货运交通;交通运输;备案;港航;土石方运输;投标资质;考评;异地')</script><a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('keywords=深圳市')'>深圳市</a>;<a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('keywords=泥头车')'>泥头车</a>;<a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('keywords=货运交通')'>货运交通</a>;<a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('keywords=交通运输')'>交通运输</a>;<a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('keywords=备案')'>备案</a>;<a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('keywords=港航')'>港航</a>;<a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('keywords=土石方运输')'>土石方运输</a>;<a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('keywords=投标资质')'>投标资质</a>;<a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('keywords=考评')'>考评</a>;<a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('keywords=异地')'>异地</a>;</td>        <td class='red12a'>公文发布日期:</td>       <td><a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('urldate=')'></a></td>          </tr>   </tbody></table></td>   <td>&nbsp;</td> </tr>   </tbody></table></td>  </tr>   <tr>    <td bgcolor='#ffffff'><img width='943' height='8' src='../../../images/xil_jl_06.jpg'></td> </tr> </tbody></table>    <table width='960' bgcolor='#ffffff' border='0' cellspacing='0' cellpadding='0'>      <tbody><tr> <td align='center' valign='top'><table width='830' border='0' cellspacing='0' cellpadding='0'>  <tbody><tr>        <td align='center' class='dbiaoti' style='padding: 15px 0px;'>关于转发深圳市住房和建设局转发《深圳市交通运输委港航和货运交通管理局关于我市泥头车运输企业土石方运输业务投标资质考评和异地泥头车备案托管第二阶段情况的通报》的通知</td>    </tr>       </tbody></table>   <table width='830' border='0' cellspacing='0' cellpadding='0'>    <tbody><tr>       <td><table width='100%' background='../../../images/erj_jl_122_28.jpg' border='0' cellspacing='0' cellpadding='0'>        <tbody><tr>    <td width='12'><img width='12' height='34' src='../../../images/erj_jl_121_25.jpg'></td>     <td><table width='100%' height='25' align='center' border='0' cellspacing='0' cellpadding='0'>   <tbody><tr>   <td class='fff12'>来源:<script>lyjs('深圳市南山区人民政府')</script><a style='text-decoration: underline; cursor: pointer;' onclick='xlsj('sitename=深圳市南山区人民政府')'>深圳市南山区人民政府</a>;</td>    <td width='80'><a onclick='checkUrl('http://www.szns.gov.cn/publish/main/1/19/tzgg/20130812110509651949516/index.html','关于转发深圳市住房和建设局转发《深圳市交通运输委港航和货运交通管理局关于我市泥头车运输企业土石方运输业务投标资质考评和异地泥头车备案托管第二阶段情况的通报》的通知','4032393');' href='#'>原文链接 &gt;&gt;</a></td>   <td width='80'><a href='/search/htmlflash4Radar?docid=4032393'>网页快照</a> &gt;&gt; </td>   </tr>            </tbody></table></td>     <td width='8'><img width='8' height='34' src='../../../images/erj_jl_123_30.jpg'></td>     </tr>       </tbody></table></td>    </tr>      </tbody></table>    <table width='830' border='0' cellspacing='0' cellpadding='0'>  <tbody><tr>   <td class='zw_link' valign='top' style='padding: 20px 0px 0px;'>  		  <br><br>各有关单位:<br>  现将《深圳市交通运输委港航和货运交通管理局关于我市泥头车运输企业土石方运输业务投标资质考评和异地泥头车备案托管第二阶段情况的通报》(深交港货[2013]164号)转发给你们,请遵照执行。目前,共有46家泥头车运输企业已获取我市土石方运输业务投标资质;共有82家异地企业204辆泥头车,分别与12家土石方运输业务投标资质企业达成了备案托管。<br>  特此通知。<br>  联系人:李衍航,电话:83788608。 <br>  附件:深交港货[2013]164号<br>  深圳市住房和建设局<br>    2013年8月9日<br>&nbsp;<br><br><br><br>          <script type='text/javascript'> 		qufj();   </script><a href='./P020131018007991034107.pdf'> 附件:深交港货[2013]164号 </a><br>   </td>  </tr>  </tbody></table>     <table width='100%' border='0' cellspacing='0' cellpadding='0'>     <tbody><tr>    <td>&nbsp;</td>   </tr>   </tbody></table></td>    </tr>  </tbody></table>";
            //infoCtx = ctxHtml.GetJsString().Replace("<tr>", "").Replace("</tr>", "").Replace("<br>", "\r\n").ToCtxString().Replace("&gt;", "");
            Parser   parser   = new Parser(new Lexer(htldtl));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("background", "../../../images/sd_in_09.jpg")));

            if (nodeList != null && nodeList.Count > 0)
            {
                infoCtx = nodeList.AsHtml().Replace("<br>", "\r\n").ToCtxString().Replace(":\r\n", ":").Replace("&gt;", "");
            }
            msgType     = infoScorce = "深圳市住房和建设局";
            releaseTime = "2013-08-09";
            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "深圳市工程", string.Empty, infoCtx, infoType);

            if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
            {
                BaseAttach attach = ToolHtml.GetBaseAttach("http://govinfo.nlc.gov.cn/gdsszfz/xxgk/szsnsqrmzf/201310/P020131018007991034107.pdf", "深交港货[2013]164号", info.Id);
                if (attach != null)
                {
                    ToolDb.SaveEntity(attach, string.Empty);
                }
            }
            return(null);
        }
Пример #4
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            this.HttpApi();
            //SetTemp();
            DateTime time = ToolHtml.GetDateTimeByLong(1543593600000);

            string html = string.Empty;
            //string url = "http://web.zjj.sz.gov.cn/HouseOutService/queryFwRentms/getGrQyInfo.json";
            //string url = "http://web.zjj.sz.gov.cn/HouseOutService/queryFwRentms/getApplyers.json";
            string url = "http://web.zjj.sz.gov.cn/HouseOutService/queryFwRentms/getUnitQyInfos.json";
            //string url = "http://web.zjj.sz.gov.cn/HouseOutService/queryFwRentms/getUnitFlatInfos.json";
            //string url = "http://web.zjj.sz.gov.cn/zfxx_jscjn/external/project/info/get?pageIndex=1&pageSize=100";
            string cookies   = string.Empty;
            string publicKey = "bnNfZGF0YTpLSUlmMndLVWJ1RmVyVEhRZWh5WTFyNzNlVEM4VmVTb3p2eFBDanN2VVJRWnExR20xdVduVk1FQnlyK0ZrMEdhcVRGRzFVUUw1dTBDNEpxRWNRSVRra3NOYWgxcFVldnJCbnpTcDJaUnU3THpyNTZsUmhzd09NdHNiZHYxVCtJbGdHdzBEcUZXczJIVVYzZkw0NWFnbldqemt3MHJpVlJ2cEs5MFFiOHBMb1E9";

            string key  = string.Format("{0}{1}{2}{3}", "e02d02ec17a14446a861bbad068c40ef", "440305", "", "1990-01-01");
            string keys = DESEncrypt.GenerateMD5(key);

            //"983f9b3b897c77f27c9bd27837d82f5f"
            //"983f9b3b897c77f27c9bd21837d02f5f"
            keys = keys.Replace("o", "p");
            keys = keys.Replace("i", "t");
            keys = keys.Replace("l", "n");
            keys = keys.Replace("1", "7");
            keys = keys.Replace("0", "8");
            Dictionary <string, string> dic1 = new Dictionary <string, string>();

            dic1.Add("key", keys);
            dic1.Add("belongto", "440305");
            dic1.Add("quart", "");
            dic1.Add("page", "60");
            dic1.Add("timestamp", "1990-01-01");
            string jsonStr = JsonConvert.SerializeObject(dic1);

            string jsonData = string.Empty;

            try
            {
                using (IWebHttpClient httpClient = new WebHttpClient())
                {
                    jsonData = httpClient.PostSync(new Uri(url), jsonStr, publicKey);
                }
            }
            catch (Exception ex)
            {
            }

            Dictionary <string, object> dics = JsonConvert.DeserializeObject <Dictionary <string, object> >(jsonData);

            object oobj = dics["date"];

            Dictionary <string, object> contents = JsonConvert.DeserializeObject <Dictionary <string, object> >(oobj.ToString());

            string str1 = contents["content"].ToString();

            List <Dictionary <string, object> > jsonLists = JsonConvert.DeserializeObject <List <Dictionary <string, object> > >(str1);

            foreach (Dictionary <string, object> keys1 in jsonLists)
            {
                WebPactCorpData model = JsonConvert.DeserializeObject <WebPactCorpData>(JsonConvert.SerializeObject(keys1));
                string          str2  = keys1.ToString();
            }

            string jsonsss = oobj.ToString();

            KeyValuePair <string, object> keyValues = (KeyValuePair <string, object>)oobj;

            Dictionary <string, object> content = (Dictionary <string, object>)dics["date"];

            object[] objs = (object[])content["content"];

            foreach (object obj in objs)
            {
                WebPactCorpData corp = obj as WebPactCorpData;
            }

            WebPactCorp entity = JsonConvert.DeserializeObject <WebPactCorp>(jsonData);

            //using (IWebHttpClient httpClient = new WebHttpClient())
            //{
            //    jsonData = httpClient.GetSync(new Uri(url), publicKey);
            //}

            //html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8, true, publicKey, ref cookies);

            IList list     = new List <BidInfo>();
            int   sqlCount = 0;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + (this.MaxCount + 20));
            }
            catch { return(null); }

            int startIndex = html.IndexOf("{");
            int endIndex   = html.LastIndexOf("}");

            html = html.Substring(startIndex, (endIndex + 1) - startIndex);
            JavaScriptSerializer        serializer  = new JavaScriptSerializer();
            Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html);

            foreach (KeyValuePair <string, object> obj in smsTypeJson)
            {
                if (obj.Key == "total")
                {
                    continue;
                }
                object[] array = (object[])obj.Value;
                foreach (object arrValue in array)
                {
                    string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, HtmlTxt = string.Empty;

                    Dictionary <string, object> dic = (Dictionary <string, object>)arrValue;
                    code      = Convert.ToString(dic["bdBH"]);
                    prjName   = Convert.ToString(dic["bdName"]);
                    bidType   = Convert.ToString(dic["gcLeiXing2"]);
                    beginDate = Convert.ToString(dic["fabuTime2"]);
                    try
                    {
                        bidMoney = Convert.ToString(dic["zhongBiaoJE"]).GetMoney();
                    }
                    catch
                    {
                    }
                    string addUrl = Convert.ToString(dic["detailUrl"]);
                    //https://www.szjsjy.com.cn:8001/jyw/queryOldDataDetail.do?type=4&id=158df5f1-73a1-440c-a59b-e4ca1464b4e9
                    InfoUrl = "https://www.szjsjy.com.cn:8001/jyw/queryOldDataDetail.do?type=4&id=" + dic["dbZhongBiaoJieGuoGuid"];

                    try
                    {
                        HtmlTxt = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString().GetReplace("\\t,\\r,\\n,\"");
                    }
                    catch { }

                    List <Dictionary <string, string> > dicFile = new List <Dictionary <string, string> >();
                    if (string.IsNullOrEmpty(HtmlTxt))
                    {
                        string strHtml = string.Empty;
                        string newUrl  = "https://www.szjsjy.com.cn:8001/jyw/queryZbgs.do?guid=" + dic["dbZhongBiaoJieGuoGuid"] + "&ggGuid=bdGuid=";
                        InfoUrl = Convert.ToString(dic["detailUrl"]);
                        try
                        {
                            HtmlTxt = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString().GetReplace("\\t,\\r,\\n,\"");
                            strHtml = this.ToolWebSite.GetHtmlByUrl(newUrl).GetJsString();
                        }
                        catch { }

                        if (!string.IsNullOrEmpty(strHtml))
                        {
                            string gcBH = string.Empty, gcName = string.Empty, xmBH = string.Empty, xmName = string.Empty, zbgsStartTime = string.Empty, zbgsEndTime = string.Empty, zbRName = string.Empty, zbdlJG = string.Empty, zbFangShi = string.Empty, bdName = string.Empty, tbrName = string.Empty, zhongBiaoJE = string.Empty, zhongBiaoGQ = string.Empty, xiangMuJiLi = string.Empty, ziGeDengJi = string.Empty, ziGeZhengShu = string.Empty, isZanDingJinE = string.Empty, gcLeiXing = string.Empty, isPLZB = string.Empty, ztbFileGroupGuid = string.Empty;
                            try
                            {
                                Dictionary <string, string> zbfs = new Dictionary <string, string>();
                                zbfs.Add("2", "邀请招标");
                                zbfs.Add("1", "公开招标");
                                zbfs.Add("YuXuanZhaoBiaoZGC", "预选招标子工程");
                                zbfs.Add("GongKaiZhaoBiao", "公开招标");
                                zbfs.Add("5", "预选招标子工程");
                                zbfs.Add("4", "单一来源");
                                zbfs.Add("DanYiLaiYuan", "单一来源");
                                zbfs.Add("YaoQingZhaoBiao", "邀请招标");
                                JavaScriptSerializer        newSerializer = new JavaScriptSerializer();
                                Dictionary <string, object> newTypeJson   = (Dictionary <string, object>)newSerializer.DeserializeObject(strHtml);
                                Dictionary <string, object> bd            = newTypeJson["bd"] as Dictionary <string, object>;
                                Dictionary <string, object> gc            = bd["gc"] as Dictionary <string, object>;
                                ztbFileGroupGuid = Convert.ToString(newTypeJson["ztbFileGroupGuid"]);
                                gcBH             = Convert.ToString(gc["gcBH"]);
                                gcName           = Convert.ToString(gc["gcName"]);
                                Dictionary <string, object> xm = bd["xm"] as Dictionary <string, object>;

                                if (xm != null)
                                {
                                    xmBH   = Convert.ToString(xm["xm_BH"]);
                                    xmName = Convert.ToString(xm["xm_Name"]);
                                }
                                object startTime = newTypeJson["zbgsStartTime"];
                                if (startTime != null)
                                {
                                    zbgsStartTime = ToolHtml.GetDateTimeByLong(Convert.ToInt64(startTime)).ToString("yyyy-MM-dd HH:mm");
                                }

                                object endTime = newTypeJson["zbgsEndTime"];
                                if (endTime != null)
                                {
                                    endDate = zbgsEndTime = ToolHtml.GetDateTimeByLong(Convert.ToInt64(endTime)).ToString("yyyy-MM-dd HH:mm");
                                }

                                buildUnit = zbRName = Convert.ToString(gc["zbRName"]);

                                zbdlJG = Convert.ToString(newTypeJson["zbdlJG"]);

                                zbFangShi = Convert.ToString(gc["zbFangShi"]);
                                if (!string.IsNullOrEmpty(zbFangShi))
                                {
                                    zbFangShi = zbfs[zbFangShi];
                                }
                                bdName  = Convert.ToString(bd["bdName"]);
                                bidUnit = tbrName = Convert.ToString(newTypeJson["tbrName"]);

                                zhongBiaoJE = Convert.ToString(newTypeJson["zhongBiaoJE"]);
                                if (!string.IsNullOrEmpty(zhongBiaoJE))
                                {
                                    try
                                    {
                                        bidMoney = zhongBiaoJE = (decimal.Parse(zhongBiaoJE) / 1000000).ToString();
                                    }
                                    catch { }
                                }
                                else
                                {
                                    try
                                    {
                                        zhongBiaoJE = Convert.ToString(newTypeJson["tongYongZhongBiaoJia"]);
                                        bidMoney    = (zhongBiaoJE + "\r\n").GetMoneyRegex(new string[] { "人民币" });
                                    }
                                    catch { }
                                }
                                zhongBiaoGQ = Convert.ToString(newTypeJson["zhongBiaoGQ"]);
                                prjMgr      = xiangMuJiLi = Convert.ToString(newTypeJson["xiangMuJiLi"]);

                                ziGeDengJi    = Convert.ToString(newTypeJson["ziGeDengJi"]);
                                ziGeZhengShu  = Convert.ToString(newTypeJson["ziGeZhengShu"]);
                                isZanDingJinE = Convert.ToString(newTypeJson["isZanDingJinE"]);
                                gcLeiXing     = Convert.ToString(bd["gcLeiXing"]);
                                isPLZB        = Convert.ToString(gc["isPLZB"]);
                            }
                            catch (Exception ex)
                            {
                                Logger.Error(ex);
                            }

                            Parser   parser  = new Parser(new Lexer(HtmlTxt));
                            NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "detail_contect")));
                            if (dtlNode != null && dtlNode.Count > 0)
                            {
                                HtmlTxt = dtlNode.AsHtml();
                                HtmlTxt = HtmlTxt.GetReplace("<span id=gcBH></span>", "<span id=gcBH>" + gcBH + "</span>");
                                HtmlTxt = HtmlTxt.GetReplace("<span id=gcName></span>", "<span id=gcName>" + gcName + "</span>");
                                HtmlTxt = HtmlTxt.GetReplace("<span id=xmBH></span>", "<span id=xmBH>" + xmBH + "</span>");
                                HtmlTxt = HtmlTxt.GetReplace("<span id=xmName></span>", "<span id=xmName>" + xmName + "</span>");
                                HtmlTxt = HtmlTxt.GetReplace("<span id=zbgsStartTime></span>", "<span id=zbgsStartTime>" + zbgsStartTime + "</span>");
                                HtmlTxt = HtmlTxt.GetReplace("<span id=zbgsEndTime></span>", "<span id=zbgsEndTime>" + zbgsEndTime + "</span>");
                                HtmlTxt = HtmlTxt.GetReplace("<span id=zbRName></span>", "<span id=zbRName>" + zbRName + "</span>");
                                HtmlTxt = HtmlTxt.GetReplace("<span id=zbdlJG></span>", "<span id=zbdlJG>" + zbdlJG + "</span>");
                                HtmlTxt = HtmlTxt.GetReplace("<span id=zbFangShi></span>", "<span id=zbFangShi>" + zbFangShi + "</span>");
                                HtmlTxt = HtmlTxt.GetReplace("<span id=bdName></span>", "<span id=bdName>" + bdName + "</span>");
                                HtmlTxt = HtmlTxt.GetReplace("<span id=tbrName></span>", "<span id=tbrName>" + tbrName + "</span>");
                                HtmlTxt = HtmlTxt.GetReplace("<span id=zhongBiaoJE></span>", "<span id=zhongBiaoJE>" + zhongBiaoJE + "</span>");
                                HtmlTxt = HtmlTxt.GetReplace("<span id=zhongBiaoGQ></span>", "<span id=zhongBiaoGQ>" + zhongBiaoGQ + "</span>");
                                HtmlTxt = HtmlTxt.GetReplace("<span id=xiangMuJiLi></span>", "<span id=xiangMuJiLi>" + xiangMuJiLi + "</span>");
                                HtmlTxt = HtmlTxt.GetReplace("<span id=ziGeDengJi></span>", "<span id=ziGeDengJi>" + ziGeDengJi + "</span>");
                                HtmlTxt = HtmlTxt.GetReplace("<span id=ziGeZhengShu></span>", "<span id=ziGeZhengShu>" + ziGeZhengShu + "</span>");
                                HtmlTxt = HtmlTxt.GetReplace("<span id=isZanDingJinE></span>", "<span id=isZanDingJinE>" + isZanDingJinE.ToLower() == "true" ? "是" : "否" + "</span>");
                            }
                            string fileUrl  = "https://www.szjsjy.com.cn:8001/jyw/filegroup/queryByGroupGuidZS.do?groupGuid=" + ztbFileGroupGuid;
                            string fileHtml = string.Empty;
                            try
                            {
                                fileHtml = this.ToolWebSite.GetHtmlByUrl(fileUrl);
                                JavaScriptSerializer        fileSerializer = new JavaScriptSerializer();
                                Dictionary <string, object> fileTypeJson   = (Dictionary <string, object>)fileSerializer.DeserializeObject(fileHtml);
                                foreach (KeyValuePair <string, object> fileObj in fileTypeJson)
                                {
                                    object[] fileArray = (object[])fileObj.Value;
                                    foreach (object fileValue in fileArray)
                                    {
                                        Dictionary <string, object> tempDic = (Dictionary <string, object>)fileValue;
                                        Dictionary <string, string> file    = new Dictionary <string, string>();
                                        file.Add("Name", Convert.ToString(tempDic["attachName"]));
                                        file.Add("Url", Convert.ToString("https://www.szjsjy.com.cn:8001/file/downloadFile?fileId=" + tempDic["attachGuid"]));
                                        dicFile.Add(file);
                                    }
                                }
                            }
                            catch { }
                        }
                    }
                    bidCtx = HtmlTxt.GetReplace("<br />,<br/>,</ br>,</br>", "\r\n").ToCtxString() + "\r\n";
                    if (string.IsNullOrEmpty(buildUnit) && string.IsNullOrEmpty(bidUnit))
                    {
                        bidUnit = bidCtx.GetBidRegex();
                        if (string.IsNullOrEmpty(bidUnit))
                        {
                            bidUnit = bidCtx.Replace(" ", "").GetBidRegex();
                        }
                        if (string.IsNullOrEmpty(bidUnit))
                        {
                            bidUnit = bidCtx.GetRegex("中 标 人");
                        }
                        string money = bidCtx.GetMoneyRegex();
                        if (!string.IsNullOrEmpty(money))
                        {
                            bidMoney = money;
                        }
                        if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0")
                        {
                            bidMoney = bidCtx.GetMoneyRegex();
                        }
                        if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0")
                        {
                            bidMoney = bidCtx.Replace(" ", "").GetMoneyRegex();
                        }
                        if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0")
                        {
                            bidMoney = bidCtx.GetRegex("中 标 价");
                        }

                        prjMgr = bidCtx.GetMgrRegex();
                        if (string.IsNullOrEmpty(prjMgr))
                        {
                            prjMgr = bidCtx.Replace(" ", "").GetMgrRegex();
                        }
                        if (string.IsNullOrEmpty(prjMgr))
                        {
                            prjMgr = bidCtx.GetRegex("项 目 总 监");
                        }

                        buildUnit = bidCtx.GetBuildRegex();
                        if (string.IsNullOrEmpty(buildUnit))
                        {
                            buildUnit = bidCtx.Replace(" ", "").GetBuildRegex();
                        }
                        if (string.IsNullOrEmpty(buildUnit))
                        {
                            buildUnit = bidCtx.GetRegex("建 设 单 位");
                        }
                        prjAddress = bidCtx.GetAddressRegex();
                        if (string.IsNullOrEmpty(prjAddress))
                        {
                            prjAddress = bidCtx.Replace(" ", "").GetAddressRegex();
                        }
                        if (string.IsNullOrEmpty(prjAddress))
                        {
                            prjAddress = bidCtx.GetRegex("工 程 地 址");
                        }
                    }
                    specType = "建设工程";
                    msgType  = "深圳市建设工程交易中心";
                    if (Encoding.Default.GetByteCount(prjMgr) > 50)
                    {
                        prjMgr = "";
                    }
                    if (Encoding.Default.GetByteCount(bidUnit) > 150)
                    {
                        string[] bidUnits = bidUnit.Split(';');
                        bidUnit = bidUnits[0];
                    }
                    if (Encoding.Default.GetByteCount(bidUnit) > 150)
                    {
                        bidUnit = "";
                    }
                    if (prjMgr.Contains("----"))
                    {
                        prjMgr = "";
                    }
                    BidInfo info = ToolDb.GenBidInfo("广东省", "深圳市工程", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, string.Empty, bidMoney, addUrl, prjMgr, HtmlTxt);
                    sqlCount++;
                    if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                    {
                        if (dicFile.Count > 0)
                        {
                            foreach (Dictionary <string, string> file in dicFile)
                            {
                                try
                                {
                                    BaseAttach item = ToolHtml.GetBaseAttach(file["Url"], file["Name"], info.Id, "SiteManage\\Files\\InviteAttach\\");
                                    if (item != null)
                                    {
                                        ToolDb.SaveEntity(item, "SourceID,AttachServerPath");
                                    }
                                }
                                catch { }
                            }
                        }
                    }
                    if (!crawlAll && sqlCount >= this.MaxCount)
                    {
                        return(null);
                    }
                }
            }
            return(list);
        }
Пример #5
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            //取得页码
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default).GetJsString();
            }
            catch (Exception ex)
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "font9green2")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    string temp = pageList.GetATag(pageList.Count - 1).Link.Replace("&", "kdxx") + "kdxx";
                    temp    = temp.GetRegexBegEnd("page=", "kdxx").Replace("&amp;", "");
                    pageInt = Convert.ToInt32(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl + "&page=" + i.ToString(), Encoding.Default).GetJsString();
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "font9grey1")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        TableRow tr = table.Rows[j];
                        infoType    = "办事指南";
                        headName    = tr.Columns[1].ToNodePlainString();
                        releaseTime = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        infoUrl     = "http://www.gzzb.gd.cn" + tr.Columns[1].GetATagHref();
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "contentDiv")));
                        if (dtlList != null && dtlList.Count > 0)
                        {
                            ctxHtml = dtlList.AsHtml();
                            infoCtx = ctxHtml.ToCtxString();
                            msgType = MsgTypeCosnt.GuangZhouMsgType;
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "广州市区", string.Empty, infoCtx, infoType);
                            sqlCount++;
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                            {
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList imgList = parser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                                if (imgList != null && imgList.Count > 0)
                                {
                                    for (int img = 0; img < imgList.Count; img++)
                                    {
                                        ImageTag imgTag = imgList[img] as ImageTag;
                                        try
                                        {
                                            BaseAttach obj = null;
                                            if (imgTag.GetAttribute("src").Contains("http"))
                                            {
                                                obj = ToolHtml.GetBaseAttach(imgTag.GetAttribute("src"), headName, info.Id);
                                            }
                                            else
                                            {
                                                obj = ToolHtml.GetBaseAttach("http://www.gzzb.gd.cn" + imgTag.GetAttribute("src"), headName, info.Id);
                                            }
                                            if (obj != null)
                                            {
                                                ToolDb.SaveEntity(obj, string.Empty);
                                            }
                                        }
                                        catch { }
                                    }
                                }
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int a = 0; a < aNode.Count; a++)
                                    {
                                        ATag aTag = aNode[a] as ATag;
                                        if (aTag.IsAtagAttach())
                                        {
                                            try
                                            {
                                                BaseAttach obj = null;
                                                if (aTag.Link.Contains("http"))
                                                {
                                                    obj = ToolHtml.GetBaseAttach(aTag.Link, aTag.LinkText, info.Id);
                                                }
                                                else
                                                {
                                                    obj = ToolHtml.GetBaseAttach("http://www.gzzb.gd.cn" + aTag.Link, aTag.LinkText, info.Id);
                                                }
                                                if (obj != null)
                                                {
                                                    ToolDb.SaveEntity(obj, string.Empty);
                                                }
                                            }
                                            catch { }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
Пример #6
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            //取得页码
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "GridView1")), true), new TagNameFilter("table")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    TableTag table = pageList[0] as TableTag;
                    pageInt = table.Rows[0].ColumnCount + 1;
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        viewState       = this.ToolWebSite.GetAspNetViewState(html);
                        eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                        NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                            "__EVENTTARGET",
                            "__EVENTARGUMENT",
                            "__VIEWSTATE",
                            "__VIEWSTATEENCRYPTED",
                            "__EVENTVALIDATION",
                            "sel",
                            "beginDate",
                            "endDate",
                            "infotitle"
                        },
                                                                                          new string[] {
                            "GridView1", "Page$" + i.ToString(), viewState, "", eventValidation, "1", "", "", ""
                        });
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "GridView1")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty,
                               infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        TableRow tr = table.Rows[j];
                        headName    = tr.Columns[1].ToNodePlainString();
                        releaseTime = tr.Columns[3].ToPlainTextString().GetDateRegex();
                        if (string.IsNullOrEmpty(releaseTime))
                        {
                            releaseTime = tr.Columns[3].ToPlainTextString().GetDateRegex("yyyy/MM/dd");
                        }
                        infoScorce = tr.Columns[2].ToNodePlainString();
                        infoType   = "通知公告";
                        infoUrl    = "http://www.szjsjy.com.cn/Notify/" + tr.Columns[1].GetATagHref();//"http://www.szjsjy.com.cn/Notify/InformContent.aspx?id=117750";//
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList noList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("background", "../img/A-3_17.gif")));
                        if (noList != null && noList.Count > 0)
                        {
                            ctxHtml    = noList.AsHtml().Replace("<br/>", "\r\n").Replace("<BR/>", "");
                            infoCtx    = noList.AsString().Replace(" ", "").Replace("&nbsp;", "").Replace("\t\t", "\t").Replace("\t\t", "\t");
                            infoCtx    = Regex.Replace(infoCtx, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase).Replace(" ", "").Replace("\t", "").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n");
                            msgType    = MsgTypeCosnt.ShenZhenMsgType;
                            infoScorce = infoScorce.Replace("&nbsp;", "");
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "深圳市工程", string.Empty, infoCtx, infoType);
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            else
                            {
                                sqlCount++;
                                if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                                {
                                    parser = new Parser(new Lexer(ctxHtml));
                                    NodeFilter aLink = new TagNameFilter("a");
                                    NodeList   aList = parser.ExtractAllNodesThatMatch(aLink);
                                    if (aList != null && aList.Count > 0)
                                    {
                                        for (int k = 0; k < aList.Count; k++)
                                        {
                                            ATag a = aList[k].GetATag();
                                            if (a != null)
                                            {
                                                if (!a.LinkText.Contains("返回"))
                                                {
                                                    try
                                                    {
                                                        BaseAttach obj = ToolHtml.GetBaseAttach("http://www.szjsjy.com.cn/" + a.Link.Replace("../", ""), a.LinkText, info.Id);
                                                        if (obj != null)
                                                        {
                                                            ToolDb.SaveEntity(obj, string.Empty);
                                                        }
                                                    }
                                                    catch { }
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
Пример #7
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            //取得页码
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default).GetJsString();
            }
            catch (Exception ex)
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("height", "28")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    string temp = pageList.AsString().GetRegexBegEnd(",共", "页");
                    pageInt = Convert.ToInt32(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl + "&pageNum=" + i.ToString(), Encoding.Default).GetJsString();
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "95%")));
                if (nodeList != null && nodeList.Count > 1)
                {
                    TableTag table = nodeList[1] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty,
                               infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        TableRow tr = table.Rows[j];
                        infoType    = "政策法规";
                        headName    = tr.Columns[0].ToNodePlainString();
                        releaseTime = tr.Columns[1].ToPlainTextString().GetDateRegex();
                        infoUrl     = "http://market.meizhou.gov.cn" + tr.Columns[0].GetATagValue("onclick").GetRegexBegEnd(",'", "',");
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString();
                        }
                        catch
                        {
                            continue;
                        }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("align", "center")));

                        if (dtlList != null && dtlList.Count > 0)
                        {
                            if (dtlList.Count > 1)
                            {
                                ctxHtml = dtlList[1].ToHtml();
                            }
                            else
                            {
                                ctxHtml = dtlList.ToHtml();
                            }
                            infoCtx = ctxHtml.ToCtxString().Replace("&gt;", "");
                            msgType = MsgTypeCosnt.MeiZhouMsgType;
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "梅州市区", string.Empty, infoCtx, infoType);
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            else
                            {
                                sqlCount++;
                                if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                                {
                                    parser = new Parser(new Lexer(htldtl));
                                    NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                    if (aNode != null && aNode.Count > 0)
                                    {
                                        for (int a = 0; a < aNode.Count; a++)
                                        {
                                            ATag aTag = aNode[a] as ATag;
                                            if (aTag.IsAtagAttach())
                                            {
                                                try
                                                {
                                                    BaseAttach baseInfo = ToolHtml.GetBaseAttach("http://market.meizhou.gov.cn" + aTag.Link, aTag.LinkText, info.Id);
                                                    if (baseInfo != null)
                                                    {
                                                        ToolDb.SaveEntity(baseInfo, string.Empty);
                                                    }
                                                }
                                                catch { }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
Пример #8
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list     = new List <InviteInfo>();
            int    sqlCount = 0;
            string html     = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + this.MaxCount);
            }
            catch { return(null); }
            int startIndex = html.IndexOf("{");
            int endIndex   = html.LastIndexOf("}");

            html = html.Substring(startIndex, (endIndex + 1) - startIndex);
            JavaScriptSerializer        serializer  = new JavaScriptSerializer();
            Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html);

            object[] objvalues = smsTypeJson["rows"] as object[];
            foreach (object objValue in objvalues)
            {
                Dictionary <string, object> dic = (Dictionary <string, object>)objValue;
                string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                code    = Convert.ToString(dic["gcBH"]);
                prjName = Convert.ToString(dic["gcName"]);
                //if (!prjName.Contains("新安翻身小学教学楼防水工程(小型工程)"))
                //    continue;


                beginDate = Convert.ToString(dic["ggStartTime2"]).GetDateRegex();
                string end = Convert.ToString(dic["ggEndTime"]);
                try
                {
                    endDate = ToolHtml.GetDateTimeByLong(Convert.ToInt64(end)).ToString();
                }
                catch { }
                inviteType = Convert.ToString(dic["gcLeiXing2"]);
                InfoUrl    = Convert.ToString(dic["detailUrl"]);
                try
                {
                    string urll = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/queryOldOTDataDetail.do?type=1&id=" + dic["gcGuid"];
                    try
                    {
                        HtmlTxt = this.ToolWebSite.GetHtmlByUrl(urll).GetJsString().GetReplace("\\t,\\r,\\n,\"");
                    }
                    catch {  }
                    if (string.IsNullOrWhiteSpace(HtmlTxt))
                    {
                        urll = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/showGongGao.do?ggGuid=" + dic["ggGuid"];
                    }

                    HtmlTxt = this.ToolWebSite.GetHtmlByUrl(urll).GetJsString().GetReplace("\\t,\\r,\\n,\"");
                    HtmlTxt = HtmlTxt.GetReplace("},{,maoDian:,html:");

                    if (string.IsNullOrWhiteSpace(HtmlTxt))
                    {
                        urll    = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/queryOldOTDataDetail.do?type=1&id=" + dic["gcGuid"];
                        HtmlTxt = this.ToolWebSite.GetHtmlByUrl(urll).GetJsString().GetReplace("\\t,\\r,\\n,\"");
                    }
                }
                catch {
                    //Logger.Error(prjName);
                    continue;
                }
                inviteCtx = HtmlTxt.Replace("</span>", "\r\n").Replace("<br />", "\r\n").Replace("<BR>", "\r\n").Replace("<br/>", "\r\n").ToCtxString();

                prjAddress = inviteCtx.GetAddressRegex();
                buildUnit  = inviteCtx.GetBuildRegex();
                if (string.IsNullOrEmpty(code))
                {
                    code = inviteCtx.GetCodeRegex();
                }
                msgType  = "深圳市建设工程交易中心宝安分中心";
                specType = "建设工程";

                InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳宝安区工程", "宝安区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                sqlCount++;

                if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx))
                {
                    Parser   parser = new Parser(new Lexer(HtmlTxt));
                    NodeList aNode  = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                    if (aNode != null && aNode.Count > 0)
                    {
                        for (int k = 0; k < aNode.Count; k++)
                        {
                            ATag a = aNode[k] as ATag;
                            if (a.IsAtagAttach())
                            {
                                string link = string.Empty;
                                if (a.Link.ToLower().Contains("http"))
                                {
                                    link = a.Link.Replace("\\", "");

                                    BaseAttach attach = null;
                                    try
                                    {
                                        attach = ToolHtml.GetBaseAttach(link, a.LinkText, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                    }
                                    catch { }
                                    if (attach != null)
                                    {
                                        ToolDb.SaveEntity(attach, "");
                                    }
                                }
                            }
                        }
                    }
                }
                if (!crawlAll && sqlCount >= this.MaxCount)
                {
                    return(list);
                }
            }
            return(list);
        }
Пример #9
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            int    pageInt         = 31;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(list);
            }

            int      startIndex = html.IndexOf("<xml");
            int      endIndex   = html.IndexOf("</xml>");
            string   xmlstr     = html.Substring(startIndex, endIndex - startIndex).ToLower().GetReplace("infourl", "span").GetReplace("info", "div").GetReplace("publishedtime", "p");
            Parser   parser     = new Parser(new Lexer(xmlstr));
            NodeList pageNode   = parser.ExtractAllNodesThatMatch(new TagNameFilter("div"));

            if (pageNode != null && pageNode.Count > 0)
            {
                for (int i = 0; i < pageNode.Count; i++)
                {
                    string prjName = string.Empty, InfoUrl = string.Empty, beginDate = string.Empty, HtmlTxt = string.Empty;
                    parser = new Parser(new Lexer(pageNode[i].ToHtml()));
                    NodeList dateNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("p"));
                    beginDate = dateNode[0].ToPlainTextString().GetDateRegex();
                    parser.Reset();
                    NodeList urlNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("span"));
                    InfoUrl = "http://jingtai.by.gov.cn/publicfiles/business/htmlfiles/" + urlNode[0].ToPlainTextString();
                    parser.Reset();
                    NodeList prjNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("title"));
                    prjName = prjNode[0].ToNodePlainString();
                    string htmldtl = string.Empty;
                    try
                    {
                        htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                    }
                    catch { continue; }
                    parser = new Parser(new Lexer(htmldtl));
                    NodeList dtl = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "zoomcon")));
                    if (dtl != null && dtl.Count > 0)
                    {
                        HtmlTxt = dtl.AsHtml();
                        if (prjName.Contains("中标") || prjName.Contains("成交") || prjName.Contains("结果"))
                        {
                            string buildUnit = string.Empty, bidUnit = string.Empty,
                                   bidMoney = string.Empty, code = string.Empty,
                                   bidDate = string.Empty,
                                   endDate = string.Empty, bidType = string.Empty,
                                   specType = string.Empty,
                                   msgType = string.Empty, bidCtx = string.Empty,
                                   prjAddress = string.Empty, remark = string.Empty,
                                   prjMgr = string.Empty, otherType = string.Empty;
                            Parser   imgParser = new Parser(new Lexer(HtmlTxt.ToLower()));
                            NodeList imgNode   = imgParser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                            string   src       = string.Empty;
                            if (imgNode != null && imgNode.Count > 0)
                            {
                                string imgUrl = (imgNode[0] as ImageTag).GetAttribute("src");
                                src     = "http://jingtai.by.gov.cn/" + imgUrl;
                                HtmlTxt = HtmlTxt.ToLower().GetReplace(imgUrl, src);
                            }
                            bidCtx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString();

                            code      = bidCtx.GetCodeRegex().GetCodeDel();
                            buildUnit = bidCtx.GetBuildRegex();
                            if (buildUnit.Contains("地址"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址"));
                            }
                            if (buildUnit.Contains("公司"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                            }

                            bidUnit = bidCtx.GetBidRegex();
                            if (string.IsNullOrEmpty(bidUnit))
                            {
                                bidUnit = bidCtx.GetRegexBegEnd("招标人确定", "单位");
                            }
                            if (string.IsNullOrEmpty(bidUnit))
                            {
                                bidUnit = bidCtx.GetRegexBegEnd("确认", "为中标");
                            }
                            if (bidUnit.Contains("公司"))
                            {
                                bidUnit = bidUnit.Remove(bidUnit.IndexOf("公司")) + "公司";
                            }
                            bidMoney = bidCtx.GetMoneyRegex(null, false, "万元");
                            if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0")
                            {
                                bidMoney = bidCtx.GetMoneyRegex(null, false, "万元", 100, ";");
                            }
                            if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0")
                            {
                                bidMoney = bidCtx.GetMoneyRegex(new string[] { "投标总报价" }, false, "万元", 100, ";");
                            }
                            bidUnit = bidUnit.GetReplace("名称");
                            try
                            {
                                if (decimal.Parse(bidMoney) > 100000)
                                {
                                    bidMoney = (decimal.Parse(bidMoney) / 10000).ToString();
                                }
                            }
                            catch { }
                            if (string.IsNullOrEmpty(buildUnit))
                            {
                                buildUnit = "广州市白云区景泰街道办事处";
                            }
                            msgType  = "广州市白云区景泰街道办事处";
                            specType = "政府采购";
                            bidType  = prjName.GetInviteBidType();
                            BidInfo info = ToolDb.GenBidInfo("广东省", "广州政府采购", "白云区", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            if (!string.IsNullOrEmpty(src))
                            {
                                string sql = string.Format("select Id from BidInfo where InfoUrl='{0}'", info.InfoUrl);
                                object obj = ToolDb.ExecuteScalar(sql);
                                if (obj == null || obj.ToString() == "")
                                {
                                    try
                                    {
                                        BaseAttach attach = ToolHtml.GetBaseAttach(src, prjName, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                        if (attach != null)
                                        {
                                            ToolDb.SaveEntity(attach, "");
                                        }
                                    }
                                    catch { }
                                }
                            }
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k].GetATag();
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://jingtai.by.gov.cn/" + a.Link.GetReplace("./");
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                        else
                        {
                            string code = string.Empty, buildUnit = string.Empty,
                                   prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                                   specType = string.Empty, endDate = string.Empty,
                                   remark = string.Empty, inviteCon = string.Empty,
                                   CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty;

                            inviteCtx  = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString();
                            inviteType = prjName.GetInviteBidType();

                            Parser   imgParser = new Parser(new Lexer(HtmlTxt.ToLower()));
                            NodeList imgNode   = imgParser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                            string   src       = string.Empty;
                            if (imgNode != null && imgNode.Count > 0)
                            {
                                string imgUrl = (imgNode[0] as ImageTag).GetAttribute("src");
                                src     = "http://jingtai.by.gov.cn/" + imgUrl;
                                HtmlTxt = HtmlTxt.ToLower().GetReplace(imgUrl, src);
                            }
                            code       = inviteCtx.GetCodeRegex().GetCodeDel();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            prjAddress = inviteCtx.GetAddressRegex();
                            if (buildUnit.Contains("地址"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址"));
                            }
                            if (buildUnit.Contains("公司"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                            }
                            if (string.IsNullOrEmpty(buildUnit))
                            {
                                buildUnit = "广州市白云区景泰街道办事处";
                            }

                            msgType  = "广州市白云区景泰街道办事处";
                            specType = "政府采购";

                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "广州政府采购", "白云区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            if (!string.IsNullOrEmpty(src))
                            {
                                string sql = string.Format("select Id from InviteInfo where InfoUrl='{0}'", info.InfoUrl);
                                object obj = ToolDb.ExecuteScalar(sql);
                                if (obj == null || obj.ToString() == "")
                                {
                                    try
                                    {
                                        BaseAttach attach = ToolHtml.GetBaseAttach(src, prjName, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                        if (attach != null)
                                        {
                                            ToolDb.SaveEntity(attach, "");
                                        }
                                    }
                                    catch { }
                                }
                            }
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k].GetATag();
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://jingtai.by.gov.cn/" + a.Link.GetReplace("./");
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }

            return(list);
        }
Пример #10
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list = new List <NotifyInfo>();
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default, ref cookiestr);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "MoreInfoList1_Pager")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[0].ToPlainTextString().GetRegexBegEnd("总页数", "当前").Replace(":", "");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState = this.ToolWebSite.GetAspNetViewState(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(
                        new string[] {
                        "__VIEWSTATE",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT"
                    },
                        new string[] {
                        viewState,
                        "MoreInfoList1$Pager",
                        i.ToString()
                    }
                        );
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string   headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        headName    = aTag.GetAttribute("title");
                        releaseTime = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        infoUrl     = "http://www.gaxqjyzx.com" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            ctxHtml = dtlNode.AsHtml();
                            infoCtx = ctxHtml.ToCtxString();


                            msgType = "贵安新区公共资源交易中心";
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "贵州省", "贵州省及地市", "贵安新区", infoCtx, "通知公告");
                            sqlCount++;
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                            {
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag fileATag = aNode[k].GetATag();
                                        if (fileATag.IsAtagAttach())
                                        {
                                            BaseAttach obj = null;
                                            try
                                            {
                                                if (fileATag.Link.ToLower().Contains("http"))
                                                {
                                                    obj = ToolHtml.GetBaseAttach(fileATag.Link, headName, info.Id);
                                                }
                                                else
                                                {
                                                    obj = ToolHtml.GetBaseAttach("http://www.gaxqjyzx.com" + fileATag.Link, headName, info.Id);
                                                }
                                            }
                                            catch { }
                                            if (obj != null)
                                            {
                                                ToolDb.SaveEntity(obj, string.Empty);
                                            }
                                        }
                                    }
                                }
                                else
                                {
                                    parser.Reset();
                                    NodeList imgNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                                    if (imgNode != null && imgNode.Count > 0)
                                    {
                                        for (int k = 0; k < imgNode.Count; k++)
                                        {
                                            ImageTag   img = imgNode[0] as ImageTag;
                                            BaseAttach obj = null;
                                            try
                                            {
                                                if (img.ImageURL.ToLower().Contains("http"))
                                                {
                                                    obj = ToolHtml.GetBaseAttach(img.ImageURL, headName, info.Id);
                                                }
                                                else
                                                {
                                                    obj = ToolHtml.GetBaseAttach("http://www.gaxqjyzx.com" + img.ImageURL, headName, info.Id);
                                                }
                                            }
                                            catch { }
                                            if (obj != null)
                                            {
                                                ToolDb.SaveEntity(obj, string.Empty);
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Пример #11
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list = new List <NotifyInfo>();
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string cookiestr       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default, ref cookiestr);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "gridview_PagerRow")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("/", "&nbsp");
                    pageInt = int.Parse(temp);
                }
                catch
                {
                }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(
                        new string[] { "__EVENTTARGET",
                                       "__EVENTARGUMENT",
                                       "__LASTFOCUS",
                                       "__VIEWSTATE",
                                       "__VIEWSTATEGENERATOR",
                                       "__EVENTVALIDATION",
                                       "keyTextBox",
                                       "PagerControl1:_ctl4",
                                       "PagerControl1:_ctl2.x",
                                       "PagerControl1:_ctl2.y" },
                        new string[] {
                        "", "", "",
                        viewState,
                        "7CE136E4",
                        eventValidation,
                        "",
                        "",
                        "3", "5"
                    }
                        );
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MyGridView1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        headName    = aTag.LinkText;
                        releaseTime = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        infoUrl     = aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString();
                        }
                        catch { Logger.Error(headName); Logger.Error(pageInt); continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "PopupBody_context")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            if (Encoding.Default.GetByteCount(headName) > 200)
                            {
                                headName = headName.Substring(0, 100);
                            }
                            ctxHtml = dtlNode.AsHtml();
                            infoCtx = ctxHtml.ToCtxString();
                            List <string> listImg = new List <string>();
                            parser = new Parser(new Lexer(ctxHtml));
                            NodeList imgNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                            if (imgNode != null && imgNode.Count > 0)
                            {
                                for (int m = 0; m < imgNode.Count; m++)
                                {
                                    string link = "http://publish.bcactc.com" + (imgNode[m] as ImageTag).ImageURL;
                                    listImg.Add(link);
                                    ctxHtml = ctxHtml.GetReplace((imgNode[m] as ImageTag).ImageURL, link);
                                }
                            }

                            msgType = "北京市建设工程发包承包交易中心";
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "北京市", "北京市区", "", infoCtx, "通知公告");
                            sqlCount++;
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                            {
                                if (listImg.Count > 0)
                                {
                                    for (int a = 0; a < listImg.Count; a++)
                                    {
                                        BaseAttach entity = null;
                                        try
                                        {
                                            entity = ToolHtml.GetBaseAttach(listImg[0], headName, info.Id);
                                            if (entity != null)
                                            {
                                                ToolDb.SaveEntity(entity, string.Empty);
                                            }
                                        }
                                        catch { }
                                    }
                                }
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag fileATag = aNode[k].GetATag();
                                        if (fileATag.IsAtagAttach())
                                        {
                                            BaseAttach obj = null;
                                            try
                                            {
                                                if (fileATag.Link.ToLower().Contains("http"))
                                                {
                                                    obj = ToolHtml.GetBaseAttach(fileATag.Link, headName, info.Id);
                                                }
                                                else
                                                {
                                                    obj = ToolHtml.GetBaseAttach("http://publish.bcactc.com/" + fileATag.Link, headName, info.Id);
                                                }
                                            }
                                            catch { }
                                            if (obj != null)
                                            {
                                                ToolDb.SaveEntity(obj, string.Empty);
                                            }
                                        }
                                    }
                                }
                            }
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Пример #12
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list     = new List <InviteInfo>();
            int    sqlCount = 0;
            string html     = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + (MaxCount + 20));
            }
            catch { return(null); }
            int startIndex = html.IndexOf("{");
            int endIndex   = html.LastIndexOf("}");

            html = html.Substring(startIndex, (endIndex + 1) - startIndex);
            JavaScriptSerializer        serializer  = new JavaScriptSerializer();
            Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html);

            foreach (KeyValuePair <string, object> obj in smsTypeJson)
            {
                if (obj.Key == "total")
                {
                    continue;
                }
                object[] array = (object[])obj.Value;
                foreach (object arrValue in array)
                {
                    string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty,
                                inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, HtmlTxt = string.Empty;
                    Dictionary <string, object> dic = (Dictionary <string, object>)arrValue;
                    code    = Convert.ToString(dic["gcBH"]);
                    prjName = Convert.ToString(dic["gcName"]);

                    inviteType = Convert.ToString(dic["gcLeiXing2"]);

                    beginDate = Convert.ToString(dic["ggStartTime2"]);
                    string addUrl = Convert.ToString(dic["detailUrl"]);
                    //https://www.szjsjy.com.cn:8001/jyw/showGongGao.do?ggGuid=03fb1287-935e-4e39-ab1a-35423a81928a&gcbh=&bdbhs=
                    InfoUrl = "https://www.szjsjy.com.cn:8001/jyw/queryOldDataDetail.do?type=1&id=" + Convert.ToString(dic["ggGuid"]);
                    try
                    {
                        try
                        {
                            HtmlTxt = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString().GetReplace("\\t,\\r,\\n,\"");
                        }
                        catch {}
                        if (string.IsNullOrEmpty(HtmlTxt))
                        {
                            HtmlTxt = this.ToolWebSite.GetHtmlByUrl("https://www.szjsjy.com.cn:8001/jyw/showGongGao.do?ggGuid=" + Convert.ToString(dic["ggGuid"])).GetJsString().GetReplace("\\t,\\r,\\n,\",{maoDian:,}");
                            Parser   dtlparser = new Parser(new Lexer(HtmlTxt));
                            NodeList dtlNode   = dtlparser.ExtractAllNodesThatMatch(new TagNameFilter("table"));//(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "zbgk")));
                            if (dtlNode != null && dtlNode.Count > 0)
                            {
                                inviteCtx = string.Empty;
                                HtmlTxt   = dtlNode.AsHtml();
                                for (int j = 0; j < dtlNode.Count; j++)
                                {
                                    TableTag table = dtlNode[j] as TableTag;
                                    for (int r = 0; r < table.RowCount; r++)
                                    {
                                        for (int c = 0; c < table.Rows[r].ColumnCount; c++)
                                        {
                                            string temp = table.Rows[r].Columns[c].ToNodePlainString().GetReplace(":,:");
                                            if ((c + 1) % 2 == 0)
                                            {
                                                inviteCtx += temp + "\r\n";
                                            }
                                            else
                                            {
                                                inviteCtx += temp + ":";
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                    catch { continue; }
                    if (string.IsNullOrEmpty(inviteCtx))
                    {
                        inviteCtx = HtmlTxt.GetReplace("<br />,<br/>,</ br>,</br>", "\r\n").ToCtxString() + "\r\n";
                    }
                    buildUnit = inviteCtx.GetBuildRegex();
                    if (string.IsNullOrEmpty(buildUnit))
                    {
                        buildUnit = inviteCtx.Replace(" ", "").GetBuildRegex();
                    }
                    if (string.IsNullOrEmpty(buildUnit))
                    {
                        buildUnit = inviteCtx.GetRegex("建 设 单 位");
                    }
                    specType   = "建设工程";
                    prjAddress = inviteCtx.GetAddressRegex();
                    if (string.IsNullOrEmpty(prjAddress))
                    {
                        prjAddress = inviteCtx.Replace(" ", "").GetAddressRegex();
                    }
                    if (string.IsNullOrEmpty(prjAddress))
                    {
                        prjAddress = inviteCtx.GetRegex("工 程 地 址");
                    }
                    msgType = "深圳市建设工程交易中心";
                    #region 2013-11-19修改
                    Dictionary <string, Regex> dicRegex = new Dictionary <string, Regex>();
                    dicRegex.Add("重要提示", new Regex(@"([.\S\s]*)(?=重要提示)"));
                    dicRegex.Add("温馨提示", new Regex(@"([.\S\s]*)(?=温馨提示)"));
                    foreach (string dicValue in dicRegex.Keys)
                    {
                        if (inviteCtx.Contains(dicValue))
                        {
                            inviteCtx = dicRegex[dicValue].Match(inviteCtx).Value;
                        }
                    }
                    #endregion

                    InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳市工程", string.Empty, string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, string.Empty, addUrl, HtmlTxt);
                    sqlCount++;
                    if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                    {
                        Parser   parser = new Parser(new Lexer(HtmlTxt));
                        NodeList aNode  = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                        if (aNode != null && aNode.Count > 0)
                        {
                            for (int a = 0; a < aNode.Count; a++)
                            {
                                ATag aTag = aNode[a].GetATag();
                                if (aTag.Link.Contains("download"))
                                {
                                    try
                                    {
                                        BaseAttach attach = ToolHtml.GetBaseAttach(aTag.Link, aTag.LinkText, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                        if (attach != null)
                                        {
                                            ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                                        }
                                    }
                                    catch
                                    {
                                    }
                                }
                            }
                        }
                    }
                    if (!crawlAll && sqlCount >= this.MaxCount)
                    {
                        return(null);
                    }
                }
            }
            return(list);
        }
Пример #13
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            //取得页码
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch (Exception ex)
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "dataPager")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    string temp = pageList.AsString().GetRegexBegEnd("共有:", "页");
                    pageInt = Convert.ToInt32(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        viewState = this.ToolWebSite.GetAspNetViewState(html);
                        NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(
                            new string[] {
                            "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "searcher:txtKeyWord",
                            "searcher:tcInputDateTime:txtDateTime1", "searcher:tcInputDateTime:txtDateTime2",
                            "searcher:ddlProvince", "searcher:ddlCity1", "searcher:ddlCity2"
                        },
                            new string[] {
                            "dataPager", i.ToString(), viewState, "", "", "", "-1", "-1", "-1"
                        }
                            );
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default).GetJsString();
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "p3")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = null;
                    if (nodeList.Count > 1)
                    {
                        table = nodeList[1] as TableTag;
                    }
                    else
                    {
                        table = nodeList[0] as TableTag;
                    }
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        TableRow tr = table.Rows[j];
                        infoType    = "政策法规";
                        headName    = tr.Columns[0].ToNodePlainString();
                        releaseTime = tr.Columns[1].ToPlainTextString().GetDateRegex();
                        infoUrl     = "http://www.sgjsj.gov.cn/sgwebims/" + tr.Columns[0].GetATagValue("onclick").Replace("(", "kdxx").Replace(")", "xxdk").GetRegexBegEnd("kdxx", "xxdk").Replace("\"", "");
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "Table4")));
                        if (dtlList != null && dtlList.Count > 0)
                        {
                            ctxHtml = dtlList.AsHtml();
                            infoCtx = ctxHtml.ToCtxString();
                            msgType = MsgTypeCosnt.ShaoGuanMsgType;
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "韶关市区", string.Empty, infoCtx, infoType);
                            sqlCount++;
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                            {
                                parser = new Parser(new Lexer(htldtl));
                                NodeList tabNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "Table1")));
                                NodeList aNode   = null;
                                if (tabNode != null && tabNode.Count > 1)
                                {
                                    parser = new Parser(new Lexer(tabNode[1].ToHtml()));
                                    aNode  = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                }
                                else if (tabNode != null && tabNode.Count > 0)
                                {
                                    parser = new Parser(new Lexer(tabNode.AsHtml()));
                                    aNode  = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                }
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int a = 0; a < aNode.Count; a++)
                                    {
                                        ATag aTag = aNode[a] as ATag;
                                        if (aTag.IsAtagAttach())
                                        {
                                            try
                                            {
                                                BaseAttach obj = ToolHtml.GetBaseAttach("http://www.sgjsj.gov.cn/sgwebims/" + aTag.Link.Replace("../", "").Replace("./", ""), aTag.LinkText, info.Id);
                                                if (obj != null)
                                                {
                                                    ToolDb.SaveEntity(obj, string.Empty);
                                                }
                                            }
                                            catch { }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
Пример #14
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list = new List <NotifyInfo>();
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "style1")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[0].ToNodePlainString().GetRegexBegEnd("/", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "?page=" + i, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "99%")), true), new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "100%"))));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        TableRow tr = (listNode[j] as TableTag).Rows[0];
                        string   headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;
                        ATag     aTag = tr.GetATag();
                        headName = aTag.LinkText;
                        if (Encoding.Default.GetByteCount(headName) > 200)
                        {
                            headName = headName.Substring(0, 99);
                        }
                        infoUrl = "http://www.hnsztb.com.cn/gsgg/" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "800")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            TableTag table = dtlNode[0] as TableTag;
                            if (table.RowCount > 1)
                            {
                                ctxHtml = table.Rows[1].ToHtml();
                            }
                            else
                            {
                                ctxHtml = table.ToHtml();
                            }
                            infoCtx     = ctxHtml.ToCtxString();
                            releaseTime = infoCtx.GetDateRegex();
                            if (string.IsNullOrEmpty(releaseTime))
                            {
                                releaseTime = infoCtx.GetDateRegex("yyyy年MM月dd日");
                            }
                            if (string.IsNullOrEmpty(releaseTime))
                            {
                                releaseTime = infoCtx.GetDateRegex("yyyy/MM/dd");
                            }
                            if (string.IsNullOrEmpty(releaseTime))
                            {
                                releaseTime = infoCtx.GetChinaTime();
                            }
                            msgType  = "河南省建设工程招标投标协会";
                            infoType = "通知公告";
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "河南省", "河南省及地市", string.Empty, infoCtx, infoType);
                            sqlCount++;
                            if (crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                            {
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k].GetATag();
                                        if (a.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = aTag.Link;
                                            }
                                            else
                                            {
                                                link = "http://www.hnsztb.com.cn/" + a.Link;
                                            }
                                            BaseAttach entity = null;
                                            try
                                            {
                                                entity = ToolHtml.GetBaseAttach(link, a.LinkText, info.Id);
                                                if (entity == null)
                                                {
                                                    entity = ToolHtml.GetBaseAttachByUrl(link, a.LinkText, info.Id);
                                                }
                                                if (entity != null)
                                                {
                                                    ToolDb.SaveEntity(entity, string.Empty);
                                                }
                                            }
                                            catch { }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Пример #15
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            int   sqlCount = 0;
            IList list     = new List <BidInfo>();

            string html = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + this.MaxCount);
            }
            catch { return(null); }
            int startIndex = html.IndexOf("{");
            int endIndex   = html.LastIndexOf("}");

            html = html.Substring(startIndex, (endIndex + 1) - startIndex);
            JavaScriptSerializer        serializer  = new JavaScriptSerializer();
            Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html);

            foreach (KeyValuePair <string, object> obj in smsTypeJson)
            {
                if (obj.Key == "total")
                {
                    continue;
                }
                object[] array = (object[])obj.Value;
                foreach (object arrValue in array)
                {
                    string prjName = string.Empty,
                                buildUnit = string.Empty, bidUnit = string.Empty,
                                bidMoney = string.Empty, code = string.Empty,
                                bidDate = string.Empty,
                                beginDate = string.Empty,
                                endDate = string.Empty, bidType = string.Empty,
                                specType = string.Empty, InfoUrl = string.Empty,
                                msgType = string.Empty, bidCtx = string.Empty,
                                prjAddress = string.Empty, remark = string.Empty,
                                prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;


                    Dictionary <string, object> dic = (Dictionary <string, object>)arrValue;
                    code      = Convert.ToString(dic["bdBH"]);
                    prjName   = Convert.ToString(dic["bdName"]);
                    beginDate = Convert.ToString(dic["fabuTime2"]);
                    string saveUrl = Convert.ToString(dic["detailUrl"]);
                    //if (!prjName.Contains("一片一路一街一景"))
                    //{
                    //    continue;
                    //}
                    InfoUrl = "https://www.szjsjy.com.cn:8001/jyw-lg/jyxx/queryOldOTDataDetail.do?type=4&id=" + dic["bdBH"];

                    List <Dictionary <string, object> > listAttachs = new List <Dictionary <string, object> >();
                    bool isJson = false;
                    try
                    {
                        HtmlTxt = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString().GetReplace("\\t,\\r,\\n,\"");
                        if (string.IsNullOrEmpty(HtmlTxt))
                        {
                            isJson = true;
                            string url     = "https://www.szjsjy.com.cn:8001/jyw-lg/jyxx/queryZbgs.do?guid=" + dic["dbZhongBiaoJieGuoGuid"] + "&ggGuid=&bdGuid=";
                            string htmldtl = this.ToolWebSite.GetHtmlByUrl(url);

                            Dictionary <string, object> dtlJsons = (Dictionary <string, object>)serializer.DeserializeObject(htmldtl);

                            buildUnit = Convert.ToString(dtlJsons["zbrAndLht"]);
                            bidUnit   = Convert.ToString(dtlJsons["tbrName"]);
                            bidMoney  = Convert.ToString(dtlJsons["zhongBiaoJE"]);
                            try
                            {
                                bidMoney = (decimal.Parse(bidMoney) / 1000000).ToString();
                            }
                            catch { }
                            prjMgr = Convert.ToString(dtlJsons["xiangMuJiLi"]);

                            Dictionary <string, object> gg = null;
                            try
                            {
                                gg = dtlJsons["gg"] as Dictionary <string, object>;
                            }
                            catch { }
                            Dictionary <string, object> bd = null;
                            Dictionary <string, object> gc = null;
                            Dictionary <string, object> xm = null;
                            try
                            {
                                bd = dtlJsons["bd"] as Dictionary <string, object>;
                            }
                            catch { }
                            try
                            {
                                gc = bd["gc"] as Dictionary <string, object>;
                            }
                            catch { }
                            try
                            {
                                xm = bd["xm"] as Dictionary <string, object>;
                            }
                            catch { }


                            string   htl      = this.ToolWebSite.GetHtmlByUrl(saveUrl);
                            Parser   parser   = new Parser(new Lexer(htl));
                            NodeList nodelist = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "right_bg")));
                            if (nodelist != null && nodelist.Count > 0)
                            {
                                HtmlTxt = nodelist.AsHtml();
                                try
                                {
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"gcBH\"></span>", "<span id=\"gcBH\">" + code + "</span>");
                                }
                                catch { }
                                try
                                {
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"gcName\"></span>", "<span id=\"gcBH\">" + gc["gcName"] + "</span>");
                                }
                                catch { }
                                try
                                {
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"bdName\"></span>", "<span id=\"bdName\">" + prjName + "</span>");
                                }
                                catch { }
                                try
                                {
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"xmBH\"></span>", "<span id=\"xmBH\">" + xm["xm_BH"] + "</span>");
                                }
                                catch { }
                                try
                                {
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"xmName\"></span>", "<span id=\"xmName\">" + xm["xm_Name"] + "</span>");
                                }
                                catch { }
                                try
                                {
                                    long zbgsStartTime = Convert.ToInt64(dtlJsons["zbgsStartTime"]);
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"zbgsStartTime\"></span>", "<span id=\"zbgsStartTime\">" + ToolHtml.GetDateTimeByLong(zbgsStartTime) + "</span>");
                                }
                                catch { }
                                try
                                {
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"zbRName\"></span>", "<span id=\"zbRName\">" + gc["zbRName"] + "</span>");
                                }
                                catch { }
                                try
                                {
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"zbdlJG\"></span>", "<span id=\"zbdlJG\">" + gc["creatorName"] + "</span>");
                                }
                                catch { }
                                try
                                {
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"tbrName\"></span>", "<span id=\"tbrName\">" + dtlJsons["tbrName"] + "</span>");
                                }
                                catch { }
                                try
                                {
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"zhongBiaoJE\"></span>", "<span id=\"zhongBiaoJE\">" + bidMoney + "万元</span>");
                                }
                                catch { }
                                try
                                {
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"zhongBiaoGQ\"></span>", "<span id=\"zhongBiaoGQ\">" + dtlJsons["zhongBiaoGQ"] + "</span>");
                                }
                                catch { }
                                try
                                {
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"xiangMuJiLi\"></span>", "<span id=\"xiangMuJiLi\">" + prjMgr + "</span>");
                                }
                                catch { }
                                try
                                {
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"ziGeDengJi\"></span>", "<span id=\"ziGeDengJi\">" + dtlJsons["ziGeDengJi"] + "</span>");
                                }
                                catch { }

                                try
                                {
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"ziGeZhengShu\"></span>", "<span id=\"ziGeZhengShu\">" + dtlJsons["ziGeZhengShu"] + "</span>");
                                }
                                catch { }
                                try
                                {
                                    string zanding = string.IsNullOrWhiteSpace(Convert.ToString(dtlJsons["isZanDingJinE"])) ? "否" : "是";
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"isZanDingJinE\"></span>", "<span id=\"isZanDingJinE\">" + zanding + "</span>");
                                }
                                catch { }
                            }
                            try
                            {
                                string fileUrl  = "https://www.szjsjy.com.cn:8001/jyw-lg/jyxx/filegroup/queryByGroupGuidZS.do?groupGuid=" + dtlJsons["ztbFileGroupGuid"];
                                string fileJson = this.ToolWebSite.GetHtmlByUrl(fileUrl);
                                Dictionary <string, object> fileDic = (Dictionary <string, object>)serializer.DeserializeObject(fileJson);
                                object[] objFile = fileDic["rows"] as object[];

                                foreach (object file in objFile)
                                {
                                    Dictionary <string, object> attach = file as Dictionary <string, object>;
                                    listAttachs.Add(attach);
                                }
                            }
                            catch { }
                        }
                    }
                    catch { continue; }
                    bidCtx = HtmlTxt.Replace("<br />", "\r\n").ToCtxString();

                    if (!isJson)
                    {
                        buildUnit = bidCtx.GetBuildRegex();
                        bidUnit   = bidCtx.GetBidRegex();
                        bidMoney  = bidCtx.GetMoneyRegex();
                        prjMgr    = bidCtx.GetMgrRegex();

                        if (string.IsNullOrEmpty(prjMgr))
                        {
                            prjMgr = bidCtx.GetRegex("项目负责");
                        }
                    }
                    msgType  = "深圳市建设工程交易中心龙岗分中心";
                    specType = "建设工程";
                    bidType  = ToolHtml.GetInviteTypes(prjName);
                    prjName  = ToolDb.GetPrjName(prjName);
                    BidInfo info = ToolDb.GenBidInfo("广东省", "深圳龙岗区工程", "龙岗区", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, saveUrl, prjMgr, HtmlTxt);

                    if (!crawlAll && sqlCount >= this.MaxCount)
                    {
                        return(null);
                    }

                    sqlCount++;
                    if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx))
                    {
                        if (!isJson)
                        {
                            Parser   parser   = new Parser(new Lexer(HtmlTxt));
                            NodeList fileNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (fileNode != null && fileNode.Count > 0)
                            {
                                for (int f = 0; f < fileNode.Count; f++)
                                {
                                    ATag tag = fileNode[f] as ATag;
                                    if (tag.IsAtagAttach() || tag.Link.ToLower().Contains("downloadfile"))
                                    {
                                        try
                                        {
                                            BaseAttach attach = null;
                                            string     link   = string.Empty;
                                            if (tag.Link.ToLower().Contains("http"))
                                            {
                                                link = tag.Link;
                                                if (link.StartsWith("\\"))
                                                {
                                                    link = link.Substring(link.IndexOf("\\"), link.Length - link.IndexOf("\\"));
                                                }
                                                if (link.EndsWith("//"))
                                                {
                                                    link = link.Remove(link.LastIndexOf("//"));
                                                }
                                                link = link.GetReplace("\\", "");
                                            }
                                            else
                                            {
                                                link = "https://www.szjsjy.com.cn:8001/" + tag.Link;
                                            }
                                            attach = ToolHtml.GetBaseAttachByUrl(link, tag.LinkText, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                            if (attach != null)
                                            {
                                                ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                                            }
                                        }
                                        catch { continue; }
                                    }
                                }
                            }
                        }
                        else if (listAttachs.Count > 0)
                        {
                            foreach (Dictionary <string, object> attach in listAttachs)
                            {
                                BaseAttach attachBase = null;
                                try
                                {
                                    string attachName = Convert.ToString(attach["attachName"]);
                                    string attachId   = Convert.ToString(attach["attachGuid"]);
                                    string link       = "https://www.szjsjy.com.cn:8001/file/downloadFile?fileId=" + attachId;

                                    attachBase = ToolHtml.GetBaseAttach(link, attachName, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                    if (attachBase != null)
                                    {
                                        ToolDb.SaveEntity(attachBase, "SourceID,AttachServerPath");
                                    }
                                }
                                catch { }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Пример #16
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new List <NotifyInfo>();
            Dictionary <string, string> dic = new Dictionary <string, string>();

            //dic.Add("盐田区", "http://yt.szzfcg.cn/portal/topicView.do?method=view&id=50074439");
            dic.Add("龙华新区", "http://lhxq.szzfcg.cn/portal/topicView.do?method=view&id=110074439");
            dic.Add("大鹏新区", "http://dp.szzfcg.cn/portal/topicView.do?method=view&id=100074439");
            dic.Add("坪山新区", "http://ps.szzfcg.cn/portal/topicView.do?method=view&id=90074439");
            dic.Add("龙岗区", "http://lg.szzfcg.cn/portal/topicView.do?method=view&id=70074439");
            dic.Add("光明新区", "http://gm.szzfcg.cn/portal/topicView.do?method=view&id=10170626");
            dic.Add("福田区", "http://ft.szzfcg.cn/portal/topicView.do?method=view&id=30074439");
            dic.Add("罗湖区", "http://lh.szzfcg.cn/portal/topicView.do?method=view&id=20074439");
            dic.Add("南山区", "http://ns.szzfcg.cn/portal/topicView.do?method=view&id=40074439");

            Dictionary <string, string> dicCity = new Dictionary <string, string>();

            //dicCity.Add("盐田区", "yt");
            dicCity.Add("龙华新区", "lhxq");
            dicCity.Add("大鹏新区", "dp");
            dicCity.Add("坪山新区", "ps");
            dicCity.Add("龙岗区", "lg");
            dicCity.Add("光明新区", "gm");
            dicCity.Add("福田区", "ft");
            dicCity.Add("罗湖区", "lh");
            dicCity.Add("南山区", "ns");

            foreach (string key in dic.Keys)
            {
                int    pageInt = 1, sqlCount = 0;
                string html            = string.Empty;
                string viewState       = string.Empty;
                string eventValidation = string.Empty;
                try
                {
                    html = this.ToolWebSite.GetHtmlByUrl(dic[key]);
                }
                catch { continue; }
                Parser   parser   = new Parser(new Lexer(html));
                NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("name", "__ec_pages")));
                if (pageNode != null && pageNode.Count > 0)
                {
                    SelectTag select = pageNode[0] as SelectTag;
                    try
                    {
                        pageInt = int.Parse(select.OptionTags[select.OptionTags.Length - 1].Value);
                    }
                    catch { }
                }
                for (int i = 1; i <= pageInt; i++)
                {
                    if (i > 1)
                    {
                        string id = dic[key].Substring(dic[key].IndexOf("id"), dic[key].Length - dic[key].IndexOf("id")).Replace("id=", "");
                        NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                            "ec_i",
                            "topicChrList_20070702_crd",
                            "topicChrList_20070702_f_a",
                            "topicChrList_20070702_p",
                            "topicChrList_20070702_s_name",
                            "topicChrList_20070702_s_topName",
                            "id",
                            "method",
                            "__ec_pages",
                            "topicChrList_20070702_rd",
                            "topicChrList_20070702_f_name",
                            "topicChrList_20070702_f_topName",
                            "topicChrList_20070702_f_ldate",
                        }, new string[] {
                            "topicChrList_20070702",
                            "20",
                            "",
                            i.ToString(),
                            "",
                            "",
                            id,
                            "view",
                            i.ToString(),
                            "20",
                            "",
                            "",
                            ""
                        });
                        html = this.ToolWebSite.GetHtmlByUrl(dic[key], nvc);
                    }
                    parser = new Parser(new Lexer(html));
                    NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "topicChrList_20070702_table")));
                    if (listNode != null & listNode.Count > 0)
                    {
                        TableTag table = listNode[0] as TableTag;
                        for (int j = 3; j < table.RowCount; j++)
                        {
                            string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                            TableRow tr = table.Rows[j];
                            headName    = tr.Columns[1].ToNodePlainString();
                            releaseTime = tr.Columns[3].ToPlainTextString();
                            infoType    = "通知公告";
                            msgType     = "深圳市" + key + "政府采购中心";

                            infoUrl = "http://" + dicCity[key] + ".szzfcg.cn" + tr.Columns[1].GetATagHref();
                            string htmldtl = string.Empty;
                            try
                            {
                                htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl).GetJsString();
                            }
                            catch { continue; }
                            parser = new Parser(new Lexer(htmldtl));
                            NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("align", "center")));
                            if (dtlNode != null && dtlNode.Count > 0)
                            {
                                ctxHtml = dtlNode[0].ToHtml();
                                infoCtx = ctxHtml.ToCtxString();
                                NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "深圳政府采购", key, infoCtx, infoType);
                                sqlCount++;
                                if (!crawlAll && sqlCount >= this.MaxCount)
                                {
                                    goto type;
                                }
                                ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate);
                            }
                            else
                            {
                                parser.Reset();
                                NodeList bodyNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("body"));
                                if (bodyNode != null && bodyNode.Count > 0)
                                {
                                    ctxHtml = bodyNode.AsHtml();
                                    infoCtx = ctxHtml.ToCtxString();
                                    NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "深圳政府采购", key, infoCtx, infoType);
                                    sqlCount++;
                                    if (!crawlAll && sqlCount >= this.MaxCount)
                                    {
                                        return(null);
                                    }
                                    if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                                    {
                                        parser.Reset();
                                        NodeList imgList = parser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                                        if (imgList != null && imgList.Count > 0)
                                        {
                                            for (int m = 0; m < imgList.Count; m++)
                                            {
                                                try
                                                {
                                                    ImageTag   img = imgList[m] as ImageTag;
                                                    string     src = img.GetAttribute("src");
                                                    BaseAttach obj = null;
                                                    if (src.Contains("http"))
                                                    {
                                                        obj = ToolHtml.GetBaseAttach(src, headName, info.Id);
                                                    }
                                                    else
                                                    {
                                                        obj = ToolHtml.GetBaseAttach("http://" + dicCity[key] + ".szzfcg.cn" + src, headName, info.Id);
                                                    }
                                                    if (obj != null)
                                                    {
                                                        ToolDb.SaveEntity(obj, string.Empty);
                                                    }
                                                }
                                                catch { }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                type : continue;
            }
            return(list);
        }
Пример #17
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            //取得页码
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("a"), new HasAttributeFilter("id", "PageDataList__ctl7_LinkButton1")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    string temp = pageList.AsString();
                    pageInt = Convert.ToInt32(temp.GetRegexBegEnd("共", "页"));
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        viewState = this.ToolWebSite.GetAspNetViewState(html);
                        NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(
                            new string[] {
                            "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "head1:username",
                            "head1:Password", "head1:rbLoginType", "Tb_keyword", "ddlNewsType", "ddlistaddnewsdate"
                        },
                            new string[] {
                            "PageDataList$_ctl" + (i + 1).ToString() + "$LinkButton1", "", viewState, "", "", "unit", "", "20", ""
                        }
                            );
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", " tb_list")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        TableRow tr = table.Rows[j];
                        infoType    = "通知公告";
                        releaseTime = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        headName    = tr.Columns[1].ToNodePlainString();
                        infoUrl     = "http://www.szpark.com.cn" + tr.Columns[1].GetATagHref();
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = ToolHtml.GetHtmlByUrl(infoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList noList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "newsinfo")));
                        if (noList != null && noList.Count > 0)
                        {
                            ctxHtml    = noList.AsHtml().Replace("<br/>", "\r\n").Replace("<BR/>", "");
                            infoCtx    = ctxHtml.ToCtxString().Replace(" ", "").Replace("&nbsp;", "").Replace("\t\t", "\t").Replace("\t\t", "\t");
                            infoCtx    = Regex.Replace(infoCtx, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase).Replace(" ", "").Replace("\t", "").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n");
                            msgType    = MsgTypeCosnt.ShenZhenFJYLMsgType;
                            infoScorce = infoScorce.Replace("&nbsp;", "");
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "深圳市工程", string.Empty, infoCtx, infoType);
                            sqlCount++;
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                            {
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList imgList = parser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                                if (imgList != null && imgList.Count > 0)
                                {
                                    for (int m = 0; m < imgList.Count; m++)
                                    {
                                        try
                                        {
                                            ImageTag img = imgList[m] as ImageTag;
                                            string   src = img.GetAttribute("src");
                                            if (src.ToLower().Contains(".gif"))
                                            {
                                                continue;
                                            }
                                            BaseAttach obj = null;
                                            if (src.Contains("http"))
                                            {
                                                obj = ToolHtml.GetBaseAttach(src, headName, info.Id);
                                            }
                                            else
                                            {
                                                obj = ToolHtml.GetBaseAttach("http://www.szpark.com.cn" + src.Replace("../", "/").Replace("./", "/"), headName, info.Id);
                                            }
                                            if (obj != null)
                                            {
                                                ToolDb.SaveEntity(obj, string.Empty);
                                            }
                                        }
                                        catch { }
                                    }
                                }
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int a = 0; a < aNode.Count; a++)
                                    {
                                        ATag aTag = aNode[a] as ATag;
                                        if (aTag.IsAtagAttach())
                                        {
                                            try
                                            {
                                                BaseAttach obj  = null;
                                                string     href = aTag.GetATagHref();
                                                if (href.Contains("http"))
                                                {
                                                    obj = ToolHtml.GetBaseAttach(href, aTag.LinkText, info.Id);
                                                }
                                                else
                                                {
                                                    obj = ToolHtml.GetBaseAttach("http://www.szpark.com.cn" + href.Replace("../", "/").Replace("./", "/"), aTag.LinkText, info.Id);
                                                }
                                                if (obj != null)
                                                {
                                                    ToolDb.SaveEntity(obj, string.Empty);
                                                }
                                            }
                                            catch { }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
Пример #18
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            //取得页码
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch (Exception ex)
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("id", "dnn_ctr467_ArticleList_cboPages")), true), new TagNameFilter("option")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    pageInt = pageList.Count;
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        viewState = this.ToolWebSite.GetAspNetViewState(html);
                        NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(
                            new string[] { "__EVENTARGUMENT", "dnn:ctr467:ArticleList:cboPages",
                                           "ScrollTop", "__dnnVariable", "__VIEWSTATE" },
                            new string[] { "", (i - 1).ToString(), "", "", viewState }
                            );
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "dnn_ctr467_ArticleList_PanelA")), true), new TagNameFilter("table")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        TableRow tr = table.Rows[j];
                        infoType    = "通知公告";
                        releaseTime = "20" + tr.Columns[2].ToPlainTextString().GetDateRegex("yy-MM-dd");
                        headName    = tr.Columns[1].ToNodePlainString();
                        infoUrl     = "http://www.szmea.net" + tr.Columns[1].GetATagHref();
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = ToolHtml.GetHtmlByUrl(SiteUrl, infoUrl, Encoding.Default).GetJsString(); //ToolHtml.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString();
                        }
                        catch { }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList noList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "dnn_ctr391_ArticleShow_lblContent")));
                        if (noList != null && noList.Count > 0)
                        {
                            ctxHtml    = noList.AsHtml().Replace("<br/>", "\r\n").Replace("<BR/>", "");
                            infoCtx    = ctxHtml.ToCtxString().Replace(" ", "").Replace("&nbsp;", "").Replace("\t\t", "\t").Replace("\t\t", "\t");
                            infoCtx    = Regex.Replace(infoCtx, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase).Replace(" ", "").Replace("\t", "").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n");
                            msgType    = MsgTypeCosnt.ShenZhenJLGCMsgType;
                            infoScorce = infoScorce.Replace("&nbsp;", "");
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "深圳市工程", string.Empty, infoCtx, infoType);
                            sqlCount++;
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                            {
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList imgList = parser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                                if (imgList != null && imgList.Count > 0)
                                {
                                    for (int m = 0; m < imgList.Count; m++)
                                    {
                                        try
                                        {
                                            ImageTag img = imgList[m] as ImageTag;
                                            string   src = img.GetAttribute("src");
                                            if (src.ToLower().Contains(".gif"))
                                            {
                                                continue;
                                            }
                                            BaseAttach obj = null;
                                            if (src.Contains("http"))
                                            {
                                                obj = ToolHtml.GetBaseAttach(src, headName, info.Id);
                                            }
                                            else
                                            {
                                                obj = ToolHtml.GetBaseAttach("http://www.szmea.net" + src.Replace("../", "/").Replace("./", "/"), headName, info.Id);
                                            }
                                            if (obj != null)
                                            {
                                                ToolDb.SaveEntity(obj, string.Empty);
                                            }
                                        }
                                        catch { }
                                    }
                                }
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int a = 0; a < aNode.Count; a++)
                                    {
                                        ATag aTag = aNode[a] as ATag;
                                        if (aTag.IsAtagAttach())
                                        {
                                            try
                                            {
                                                BaseAttach obj  = null;
                                                string     href = aTag.GetATagHref();
                                                if (href.Contains("http"))
                                                {
                                                    obj = ToolHtml.GetBaseAttach(href, aTag.LinkText, info.Id);
                                                }
                                                else
                                                {
                                                    obj = ToolHtml.GetBaseAttach("http://www.szmea.net" + href.Replace("../", "/").Replace("./", "/"), aTag.LinkText, info.Id);
                                                }
                                                if (obj != null)
                                                {
                                                    ToolDb.SaveEntity(obj, string.Empty);
                                                }
                                            }
                                            catch { continue; }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
Пример #19
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list = new List <BidSituation>();
            string html = string.Empty;
            string cookiestr = string.Empty;
            string viewState = string.Empty;
            int    pageInt = 1, sqlCount = 0;
            string eventValidation = string.Empty;

            try
            {
                html            = this.ToolWebSite.GetHtmlByUrl("http://www.szjsjy.com.cn/HomePage.aspx", Encoding.UTF8, ref cookiestr);
                viewState       = this.ToolWebSite.GetAspNetViewState(html);
                eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                NameValueCollection n = this.ToolWebSite.GetNameValueCollection(
                    new string[] {
                    "__VIEWSTATE",
                    "__VIEWSTATEENCRYPTED",
                    "__EVENTVALIDATION",
                    "TextBox1",
                    "ddl",
                    "DDL_Govt",
                    "DDL_Trade",
                    "txtText",
                    "hdnSN",
                    "ImageButton2.x",
                    "ImageButton2.y"
                },
                    new string[] {
                    viewState,
                    "",
                    eventValidation,
                    "请输入关键字", "0", "0", "0",
                    "CN=年度施工投标人7,OU=1007,L=深圳市,ST=广东省,C=CN",
                    "241EDFC1BA276AA7", "19", "13"
                }
                    );
                string tempCookie = string.Empty;
                html = this.ToolWebSite.GetHtmlByUrl("http://www.szjsjy.com.cn/HomePage.aspx", n
                                                     , Encoding.UTF8, ref tempCookie);
                cookiestr = tempCookie.Replace("path=/;", "").Replace("HttpOnly,", "").Replace("HttpOnly", "").Replace(" ", ""); //"_gscu_485601283=265607704dljg167; _gscs_485601283=32711103yul0an14|pv:5;" + tempCookie.Replace("path=/;", "").Replace("HttpOnly,", "").Replace("HttpOnly", "").Replace(" ", "");
                //tempCookie = tempCookie.Replace("path=/;", "").Replace("HttpOnly,", "").Replace("HttpOnly", "").Replace(" ", "");
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_Content_GridView1")));

            if (pageNode != null && pageNode.Count > 0)
            {
                TableTag table = pageNode[0] as TableTag;
                try
                {
                    string temp = table.Rows[table.RowCount - 1].ToNodePlainString().GetRegexBegEnd(",共", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__VIEWSTATE",
                        "__VIEWSTATEENCRYPTED",
                        "__EVENTVALIDATION",
                        "ctl00$Content$drpSearchType",
                        "ctl00$Content$txtQymc",
                        "ctl00$Content$hdnOperate",
                        "ctl00$hdnPageCount"
                    },
                                                                                      new string[] {
                        "ctl00$Content$GridView1",
                        "Page$" + i,
                        viewState,
                        "",
                        eventValidation,
                        "0", "", "", pageInt.ToString()
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_Content_GridView1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        string code = string.Empty, prjName = string.Empty, PublicityEndDate = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, ctx = string.Empty, HtmlTxt = string.Empty, beginDate = string.Empty;

                        TableRow tr = table.Rows[j];
                        code             = tr.Columns[1].ToNodePlainString();
                        prjName          = tr.Columns[2].ToNodePlainString();
                        PublicityEndDate = tr.Columns[3].ToPlainTextString();
                        beginDate        = DateTime.Now.ToString();
                        InfoUrl          = "http://www.szjsjy.com.cn/BusinessInfo/" + tr.Columns[4].GetATagHref();
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8, ref cookiestr).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "ContentContainer")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.AsHtml();
                            ctx     = HtmlTxt.ToCtxString();
                            msgType = "深圳市建设工程交易中心";
                            BidSituation info = ToolDb.GetBidSituation("广东省", "深圳市工程", "", code, prjName, PublicityEndDate, msgType, InfoUrl, ctx, HtmlTxt, beginDate);
                            sqlCount++;
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(list);
                            }

                            if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx))
                            {
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int d = 0; d < aNode.Count; d++)
                                    {
                                        ATag aTag = aNode[0] as ATag;
                                        if (!aTag.IsAtagAttach())
                                        {
                                            continue;
                                        }
                                        string     url    = "http://www.szjsjy.com.cn/" + aTag.Link.Replace("../", "");
                                        BaseAttach attach = null;
                                        try
                                        {
                                            attach = ToolHtml.GetBaseAttach(url, aTag.LinkText, info.Id, "SiteManage\\Files\\Attach\\");
                                            if (attach == null)
                                            {
                                                attach = ToolHtml.GetBaseAttach(url, aTag.LinkText, info.Id, "SiteManage\\Files\\Attach\\");
                                            }
                                        }
                                        catch { }
                                        if (attach != null)
                                        {
                                            ToolDb.SaveEntity(attach, string.Empty);
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Пример #20
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list = new List <BidSituation>();
            string html = string.Empty;
            string cookiestr = string.Empty;
            string viewState = string.Empty;
            int    pageInt = 1, sqlCount = 0;
            string eventValidation = string.Empty;
            string tempCookie      = string.Empty;

            try
            {
                html            = this.ToolWebSite.GetHtmlByUrl("http://jyzx.cb.gov.cn/LGjyzxWeb/SiteManage/Index.aspx", Encoding.UTF8, ref cookiestr);
                viewState       = this.ToolWebSite.GetAspNetViewState(html);
                eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);

                NameValueCollection n = this.ToolWebSite.GetNameValueCollection(
                    new string[] {
                    "ctl00$ScriptManager1",
                    "__EVENTTARGET",
                    "__EVENTARGUMENT",
                    "__VIEWSTATE",
                    "ctl00$cph_context$Login1$hfCertTitle",
                    "ctl00$cph_context$DropDownList1",
                    "ctl00$cph_context$DropDownList2",
                    "select3",
                    "textfield",
                    "ctl00$cph_context$Login1$btnLogin.x",
                    "ctl00$cph_context$Login1$btnLogin.y"
                },
                    new string[] {
                    "ctl00$cph_context$Login1$upLogin|ctl00$cph_context$Login1$btnLogin",
                    "", "",
                    viewState,
                    "CN=年度施工投标人7,OU=1007,L=深圳市,ST=广东省,C=CN",
                    "",
                    "",
                    "=全文检索=",
                    "输入查询内容",
                    "22",
                    "8"
                }
                    );

                html = this.ToolWebSite.GetHtmlByUrl("http://jyzx.cb.gov.cn/LGjyzxWeb/SiteManage/Index.aspx", n, Encoding.UTF8, ref tempCookie);
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref tempCookie);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "inside_table3_bottom")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[0].ToPlainTextString().GetRegexBegEnd(",共", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState = this.ToolWebSite.GetAspNetViewState(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "ctl00$ScriptManager1",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__VIEWSTATE",
                        "ctl00$cph_context$KBQKGSList$ddlSearchType",
                        "ctl00$cph_context$KBQKGSList$txtQymc",
                        "ctl00$cph_context$KBQKGSList$GridViewPaging1$txtGridViewPagingForwardTo",
                        "__VIEWSTATEENCRYPTED",
                        "ctl00$cph_context$KBQKGSList$GridViewPaging1$btnNext.x",
                        "ctl00$cph_context$KBQKGSList$GridViewPaging1$btnNext.y"
                    }, new string[] {
                        "ctl00$cph_context$KBQKGSList$UpdatePanel2|ctl00$cph_context$KBQKGSList$GridViewPaging1$btnNext",
                        "", "",
                        viewState,
                        "A.Gcbh",
                        "",
                        (i - 1).ToString(),
                        "", "5", "6"
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8, ref tempCookie);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_cph_context_KBQKGSList_GridView1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string code = string.Empty, prjName = string.Empty, PublicityEndDate = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, ctx = string.Empty, HtmlTxt = string.Empty, beginDate = string.Empty;

                        TableRow tr = table.Rows[j];
                        code             = tr.Columns[1].ToNodePlainString();
                        prjName          = tr.Columns[2].ToNodePlainString();
                        PublicityEndDate = tr.Columns[3].ToPlainTextString();
                        beginDate        = DateTime.Now.ToString();
                        InfoUrl          = "http://jyzx.cb.gov.cn/LGjyzxWeb/SiteManage/" + tr.Columns[4].GetATagHref();
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8, ref tempCookie).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "listtable")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.AsHtml();
                            ctx     = HtmlTxt.ToCtxString();
                            msgType = "深圳市建设工程交易中心龙岗分中心";
                            BidSituation info = ToolDb.GetBidSituation("广东省", "深圳龙岗区工程", "龙岗区", code, prjName, PublicityEndDate, msgType, InfoUrl, ctx, HtmlTxt, beginDate);
                            sqlCount++;
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(list);
                            }

                            if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx))
                            {
                                if (this.ExistsUpdate)
                                {
                                    object id = ToolDb.ExecuteScalar(string.Format("select Id from BidSituation where InfoUrl='{0}'", info.InfoUrl));
                                    if (id != null)
                                    {
                                        string sql = string.Format("delete from BaseAttach where SourceID='{0}'", id);
                                        ToolDb.ExecuteSql(sql);
                                    }
                                }
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int d = 0; d < aNode.Count; d++)
                                    {
                                        ATag aTag = aNode[0] as ATag;
                                        if (!aTag.IsAtagAttach())
                                        {
                                            continue;
                                        }
                                        string     url    = "http://jyzx.cb.gov.cn/LGjyzxWeb/" + aTag.Link.Replace("../", "");
                                        BaseAttach attach = null;
                                        try
                                        {
                                            attach = ToolHtml.GetBaseAttach(url, aTag.LinkText, info.Id, "SiteManage\\Files\\Attach\\");
                                            if (attach == null)
                                            {
                                                attach = ToolHtml.GetBaseAttach(url, aTag.LinkText, info.Id, "SiteManage\\Files\\Attach\\");
                                            }
                                        }
                                        catch { }
                                        if (attach != null)
                                        {
                                            ToolDb.SaveEntity(attach, string.Empty);
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Пример #21
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list     = new List <BidSituation>();
            int    sqlCount = 0;
            string html     = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + this.MaxCount);
            }
            catch { return(null); }
            int startIndex = html.IndexOf("{");
            int endIndex   = html.LastIndexOf("}");

            html = html.Substring(startIndex, (endIndex + 1) - startIndex);
            JavaScriptSerializer        serializer  = new JavaScriptSerializer();
            Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html);

            object[] objvalues = smsTypeJson["rows"] as object[];
            foreach (object objValue in objvalues)
            {
                Dictionary <string, object> dic = (Dictionary <string, object>)objValue;
                string code = string.Empty, prjName = string.Empty, PublicityEndDate = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, ctx = string.Empty, HtmlTxt = string.Empty, beginDate = string.Empty;
                code      = Convert.ToString(dic["bdBH"]);
                prjName   = Convert.ToString(dic["bdName"]);
                beginDate = Convert.ToString(dic["faBuTime2"]);
                string idt = Convert.ToString(dic["bdGuid"]);
                InfoUrl = Convert.ToString(dic["detailUrl"]);
                string attachJson = string.Empty;
                try
                {
                    string urll = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/queryOldOTDataDetail.do?type=5&id=" + idt;
                    HtmlTxt = this.ToolWebSite.GetHtmlByUrl(urll).GetJsString().GetReplace("\\t,\\r,\\n,\"");
                    if (string.IsNullOrWhiteSpace(HtmlTxt))
                    {
                        string kdGuid = Convert.ToString(dic["kbJiLuGuid"]);
                        InfoUrl = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/kbJiLu_View.do?kbJiLuGuid=" + kdGuid;
                        HtmlTxt = this.ToolWebSite.GetHtmlByUrl(InfoUrl);
                        string url = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/querykbJiLuDetail.do?ggGuid=&bdGuid=&kbJiLuGuid=" + kdGuid;
                        attachJson = this.ToolWebSite.GetHtmlByUrl(url);
                    }
                }
                catch (Exception ex) { continue; }

                string gcBh = string.Empty, gcName = string.Empty, gcLeixing = string.Empty,
                       jywTime = string.Empty, kbjiGuid = string.Empty, surl = string.Empty,
                       attachId = string.Empty, attachFileGroupGuid = string.Empty;

                if (!string.IsNullOrWhiteSpace(attachJson))
                {
                    JavaScriptSerializer        newSerializer = new JavaScriptSerializer();
                    Dictionary <string, object> newTypeJson   = (Dictionary <string, object>)newSerializer.DeserializeObject(attachJson);
                    Dictionary <string, object> kdInfo        = (Dictionary <string, object>)newTypeJson["kbJiLu"];

                    try
                    {
                        attachId            = Convert.ToString(kdInfo["kbJiLuGuid"]);
                        attachFileGroupGuid = Convert.ToString(kdInfo["attachFileGroupGuid"]);
                    }
                    catch { }
                    gcLeixing = Convert.ToString(kdInfo["gcLeiXing"]);
                    jywTime   = Convert.ToString(kdInfo["jywFaBuEndTime"]);
                    //https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/kbJiLu_View.do?kbJiLuGuid=9cb75eb8-66b6-441c-9686-471dfa357ff5
                    surl       = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/kbJiLu_View.do?kbJiLuGuid=" + attachFileGroupGuid;
                    attachJson = this.ToolWebSite.GetHtmlByUrl(surl);

                    HtmlTxt = attachJson;
                    Parser   parserNew = new Parser(new Lexer(HtmlTxt));
                    NodeList tableNode = parserNew.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "de_tab1")));
                    if (tableNode != null && tableNode.Count > 0)
                    {
                        HtmlTxt = tableNode.AsHtml();
                        HtmlTxt = HtmlTxt.GetReplace("<td id=\"bdBH\">&nbsp;</td>", "<td id=\"bdBH\">&nbsp;" + code + "</td>");
                        HtmlTxt = HtmlTxt.GetReplace("<td id=\"bdName\">&nbsp;</td>", "<td  id=\"bdName\">&nbsp;" + prjName + "</td>");
                        HtmlTxt = HtmlTxt.GetReplace("<td id=\"gcLeiXing\">&nbsp;</td>", "<td id=\"gcLeiXing\">&nbsp;" + gcLeixing + "</td>");
                        HtmlTxt = HtmlTxt.GetReplace("<td id=\"jieZhiTime\">&nbsp;</td>", "<td id=\"jieZhiTime\">&nbsp;" + jywTime + "</td>");
                        ctx     = HtmlTxt.Replace("</tr>", "\r\n").ToCtxString();
                    }
                }
                ctx = HtmlTxt.ToCtxString();
                string saveUrl = Convert.ToString(dic["detailUrl"]);
                msgType = "深圳市建设工程交易中心宝安分中心";
                BidSituation info = ToolDb.GetBidSituation("广东省", "深圳宝安区工程", "宝安区", code, prjName, PublicityEndDate, msgType, InfoUrl, ctx, HtmlTxt, beginDate);
                sqlCount++;
                if (!crawlAll && sqlCount >= this.MaxCount)
                {
                    return(list);
                }
                if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx))
                {
                    if (!string.IsNullOrWhiteSpace(attachFileGroupGuid))
                    {
                        string moJson = string.Empty;
                        string sUrl   = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/filegroup/queryByGroupGuidZS.do?groupGuid=" + attachFileGroupGuid;
                        try
                        {
                            moJson = this.ToolWebSite.GetHtmlByUrl(sUrl);
                        }
                        catch { }
                        if (!string.IsNullOrWhiteSpace(moJson))
                        {
                            JavaScriptSerializer        newSerializers = new JavaScriptSerializer();
                            Dictionary <string, object> newTypeJsons   = (Dictionary <string, object>)newSerializers.DeserializeObject(moJson);
                            Dictionary <string, object> mofo           = (Dictionary <string, object>)newTypeJsons;
                            object[] objs = (object[])mofo["rows"];
                            foreach (object objAttach in objs)
                            {
                                Dictionary <string, object> attachs = (Dictionary <string, object>)objAttach;
                                string     attachguid = Convert.ToString(attachs["attachGuid"]);
                                string     attachName = Convert.ToString(attachs["attachName"]);
                                string     link       = "https://www.szjsjy.com.cn:8001/file/downloadFile?fileId=" + attachguid;
                                BaseAttach attach     = ToolHtml.GetBaseAttach(link, attachName, info.Id, "SiteManage\\Files\\Attach\\");
                                if (attach != null)
                                {
                                    ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                                }
                            }
                        }
                    }
                    else
                    {
                        Parser   parser   = new Parser(new Lexer(HtmlTxt));
                        NodeList fileNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                        if (fileNode != null && fileNode.Count > 0)
                        {
                            for (int f = 0; f < fileNode.Count; f++)
                            {
                                ATag tag = fileNode[f] as ATag;

                                try
                                {
                                    BaseAttach attach = null;
                                    string     link   = string.Empty;
                                    if (tag.Link.ToLower().Contains("http"))
                                    {
                                        link = tag.Link;
                                        if (link.Contains("\\"))
                                        {
                                            link = link.Replace("\\", "");
                                        }
                                    }
                                    else
                                    {
                                        link = "https://www.szjsjy.com.cn:8001/" + tag.Link;
                                    }
                                    attach = ToolHtml.GetBaseAttach(link, tag.LinkText, info.Id, "SiteManage\\Files\\Attach\\");

                                    if (attach != null)
                                    {
                                        ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                                    }
                                }
                                catch { continue; }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Пример #22
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list = new List <NotifyInfo>();
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("select"));

            if (pageNode != null && pageNode.Count > 0)
            {
                SelectTag selTag = pageNode[0] as SelectTag;
                try
                {
                    string temp = selTag.OptionTags[selTag.OptionTags.Length - 1].OptionText;
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.yantian.gov.cn/icatalog/qzf/08/tzgg/index_" + (i - 1).ToString() + ".shtml");
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "100%")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string   headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;
                        TableRow tr = table.Rows[j];

                        ATag aTag = tr.Columns[2].GetATag();
                        headName    = aTag.GetAttribute("title");
                        releaseTime = tr.Columns[3].ToPlainTextString().GetDateRegex();
                        infoUrl     = "http://www.yantian.gov.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "content")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            ctxHtml = dtlNode.AsHtml();
                            infoCtx = ctxHtml.ToCtxString();


                            msgType = "深圳市盐田区政府采购中心";
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "深圳区及街道工程", "盐田区", infoCtx, "通知公告");
                            sqlCount++;
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                            {
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag fileATag = aNode[k].GetATag();
                                        if (fileATag.IsAtagAttach())
                                        {
                                            BaseAttach obj = null;
                                            try
                                            {
                                                if (fileATag.Link.ToLower().Contains("http"))
                                                {
                                                    obj = ToolHtml.GetBaseAttach(fileATag.Link, headName, info.Id);
                                                }
                                                else
                                                {
                                                    obj = ToolHtml.GetBaseAttach("http://www.yantian.gov.cn/" + fileATag.Link, headName, info.Id);
                                                }
                                            }
                                            catch { }
                                            if (obj != null)
                                            {
                                                ToolDb.SaveEntity(obj, string.Empty);
                                            }
                                        }
                                    }
                                }
                                else
                                {
                                    parser.Reset();
                                    NodeList imgNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                                    if (imgNode != null && imgNode.Count > 0)
                                    {
                                        for (int k = 0; k < imgNode.Count; k++)
                                        {
                                            ImageTag   img = imgNode[0] as ImageTag;
                                            BaseAttach obj = null;
                                            try
                                            {
                                                if (img.ImageURL.ToLower().Contains("http"))
                                                {
                                                    obj = ToolHtml.GetBaseAttach(img.ImageURL, headName, info.Id);
                                                }
                                                else
                                                {
                                                    obj = ToolHtml.GetBaseAttach("http://www.yantian.gov.cn/" + img.ImageURL, headName, info.Id);
                                                }
                                            }
                                            catch { }
                                            if (obj != null)
                                            {
                                                ToolDb.SaveEntity(obj, string.Empty);
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Пример #23
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookieStr       = string.Empty;
            int    sqlCount        = 0;
            int    pageInt         = 1;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "cn6")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().Replace("(", "kdxx").GetRegexBegEnd("kdxx", ",");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.gdzbtb.gov.cn/pbbgbd/pingbiaobaogao_" + (i - 1).ToString() + ".htm", Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "position2")), true), new TagNameFilter("li")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    for (int j = 0; j < nodeList.Count; j++)
                    {
                        string bProv = string.Empty, bCity = string.Empty, bArea = string.Empty, bPrjno = string.Empty,
                               bPrjname = string.Empty, bBidresultendtime = string.Empty,
                               bBaseprice = string.Empty, bBiddate = string.Empty, bBuildunit = string.Empty, bBidmethod = string.Empty, bRemark = string.Empty, bInfourl = string.Empty;

                        bPrjname = nodeList[j].GetATagValue("title");
                        if (bPrjname.Contains("广东省"))
                        {
                            bCity    = "广州市区";
                            bPrjname = bPrjname.Replace("[", "").Replace("]-", "").Replace("]", "").Replace("广东省", "");
                        }
                        else
                        {
                            string temp = bPrjname.Replace("[", "kdxx").Replace("]", "xxdk").GetRegexBegEnd("kdxx", "xxdk");
                            bPrjname = bPrjname.Replace("[", "").Replace("]-", "").Replace("]", "").Replace(temp, "");
                            bCity    = temp + "区";
                        }
                        bInfourl = "http://www.gdzbtb.gov.cn/pbbgbd/" + nodeList[j].GetATagHref().Replace("../", "").Replace("./", "");
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = this.ToolWebSite.GetHtmlByUrl(bInfourl, Encoding.Default);
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellSpacing", "1")));

                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            string htmlTxt = dtlNode.AsHtml();
                            bBiddate = htmlTxt.GetDateRegex();
                            if (string.IsNullOrEmpty(bBiddate))
                            {
                                bBiddate = DateTime.Now.ToString("yyyy-MM-dd");
                            }

                            string attachUrl = string.Empty;
                            int    len1 = 0, len2 = 0;
                            len1 = htldtl.IndexOf("$(\"#pbbg_shongti\")");
                            len2 = htldtl.IndexOf("</a>");
                            string aurl       = string.Empty;
                            string attachName = string.Empty;
                            if (len1 > 0 && len2 > 0)
                            {
                                aurl   = htldtl.Substring(len1, len2 - len1) + "</a>";
                                parser = new Parser(new Lexer(aurl));
                                NodeList atagNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (atagNode != null && atagNode.Count > 0)
                                {
                                    ATag aTag = atagNode.GetATag();
                                    attachUrl  = aTag.Link;
                                    attachName = aTag.LinkText;
                                }
                            }

                            if (string.IsNullOrEmpty(attachName))
                            {
                                attachName = bPrjname;
                            }
                            BidProject info = ToolDb.GenResultProject("广东省", bCity, "", bPrjno, bPrjname, bBidresultendtime, bBaseprice, bBiddate, bBuildunit, bBidmethod, bRemark, bInfourl);
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(list);
                            }
                            sqlCount++;
                            string sql    = string.Format("select Id from BidProject where 1=1 and InfoUrl='{0}'", info.InfoUrl);
                            string result = Convert.ToString(ToolDb.ExecuteScalar(sql));
                            if (!string.IsNullOrEmpty(result))
                            {
                                if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                                {
                                    if (!string.IsNullOrEmpty(attachUrl))
                                    {
                                        string fileUrl = string.Empty;
                                        try
                                        {
                                            fileUrl = DateTime.Parse(bBiddate).ToString("yyyyMM");
                                        }
                                        catch { fileUrl = DateTime.Now.ToString("yyyyMM"); }
                                        string     alink  = "http://www.gdzbtb.gov.cn/pbbgbd/" + fileUrl + "/" + attachUrl.Replace("../", "").Replace("./", "");
                                        BaseAttach attach = null;
                                        try
                                        {
                                            attach = ToolHtml.GetBaseAttach(alink, attachName, info.Id, "SiteManage\\Files\\Attach\\");
                                            if (attach == null)
                                            {
                                                attach = ToolHtml.GetBaseAttachByUrl(alink, attachName, info.Id, "SiteManage\\Files\\Attach\\");
                                            }
                                        }
                                        catch { }
                                        if (attach != null)
                                        {
                                            string sqlDelete = string.Format("delete from BaseAttach where SourceId='{0}'", result);
                                            ToolDb.ExecuteSql(sqlDelete);
                                            ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                                        }
                                    }
                                }
                            }
                            else
                            {
                                if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                                {
                                    if (!string.IsNullOrEmpty(attachUrl))
                                    {
                                        string fileUrl = string.Empty;
                                        try
                                        {
                                            fileUrl = DateTime.Parse(bBiddate).ToString("yyyyMM");
                                        }
                                        catch { fileUrl = DateTime.Now.ToString("yyyyMM"); }
                                        string     alink  = "http://www.gdzbtb.gov.cn/pbbgbd/" + fileUrl + "/" + attachUrl.Replace("../", "").Replace("./", "");
                                        BaseAttach attach = null;
                                        try
                                        {
                                            attach = ToolHtml.GetBaseAttach(alink, attachName, info.Id, "SiteManage\\Files\\Attach\\");
                                            if (attach == null)
                                            {
                                                attach = ToolHtml.GetBaseAttachByUrl(alink, attachName, info.Id, "SiteManage\\Files\\Attach\\");
                                            }
                                        }
                                        catch { }
                                        if (attach != null)
                                        {
                                            ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Пример #24
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch { return(list); }
            int      startIndex = html.IndexOf("<xml");
            int      endIndex   = html.IndexOf("</xml>");
            string   xmlstr     = html.Substring(startIndex, endIndex - startIndex).ToLower().GetReplace("infourl", "span").GetReplace("info", "div").GetReplace("publishedtime", "p");
            Parser   parser     = new Parser(new Lexer(xmlstr));
            NodeList pageNode   = parser.ExtractAllNodesThatMatch(new TagNameFilter("div"));

            if (pageNode != null && pageNode.Count > 0)
            {
                for (int i = 0; i < pageNode.Count; i++)
                {
                    parser = new Parser(new Lexer(pageNode[i].ToHtml()));
                    NodeList dateNode  = parser.ExtractAllNodesThatMatch(new TagNameFilter("p"));
                    string   beginDate = dateNode[0].ToPlainTextString().GetDateRegex();
                    parser.Reset();
                    NodeList urlNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("span"));
                    string   infoUrl = "http://dongcheng.dg.gov.cn/publicfiles//business/htmlfiles/" + urlNode[0].ToPlainTextString();
                    string   htmldtl = string.Empty;
                    try
                    {
                        htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl).GetJsString();
                    }
                    catch { continue; }
                    parser = new Parser(new Lexer(htmldtl));
                    NodeList titleNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("title"));
                    string   prjName   = titleNode[0].ToNodePlainString();
                    if (prjName.Contains("_"))
                    {
                        prjName = prjName.Remove(prjName.IndexOf("_"));
                    }
                    if (prjName.Contains("中标"))
                    {
                        string buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        parser.Reset();
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("valign", "top")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.AsHtml();
                            bidCtx  = HtmlTxt.GetReplace("</p>", "\r\n").ToCtxString();

                            buildUnit = bidCtx.GetBuildRegex();
                            bidUnit   = bidCtx.GetBidRegex();
                            bidMoney  = bidCtx.GetRegex("中标值").GetMoney();
                            if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0")
                            {
                                bidMoney = bidCtx.GetMoneyRegex();
                            }
                            prjMgr = bidCtx.GetMgrRegex();
                            code   = bidCtx.GetCodeRegex();

                            specType = "政府采购";
                            bidType  = prjName.GetInviteBidType();
                            msgType  = "东莞市东城区办事处";
                            BidInfo info = ToolDb.GenBidInfo("广东省", "东莞市区", "东城区", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, infoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k].GetATag();
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://dongcheng.dg.gov.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                        }
                    }
                    else if (prjName.Contains("通知"))
                    {
                        string InfoTitle = string.Empty, InfoType = string.Empty, PublistTime = string.Empty, InfoCtx = string.Empty, prjCode = string.Empty, buildUnit = string.Empty, htmlTxt = string.Empty, area = string.Empty;

                        parser.Reset();
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("valign", "top")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            InfoTitle   = prjName;
                            PublistTime = beginDate;
                            htmlTxt     = dtlNode.AsHtml();
                            Parser   imgParser = new Parser(new Lexer(htmlTxt.ToLower()));
                            NodeList imgNode   = imgParser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                            string   src       = string.Empty;
                            if (imgNode != null && imgNode.Count > 0)
                            {
                                string imgUrl = (imgNode[0] as ImageTag).GetAttribute("src");
                                src     = "http://dongcheng.dg.gov.cn/" + imgUrl;
                                htmlTxt = htmlTxt.ToLower().GetReplace(imgUrl, src);
                            }
                            InfoCtx = htmlTxt.ToCtxString();

                            NoticeInfo info = ToolDb.GenNoticeInfo("广东省", "东莞市区", "东城区", string.Empty, InfoTitle, InfoType, InfoCtx, PublistTime, string.Empty, "东莞市东城区办事处", infoUrl, prjCode, buildUnit, string.Empty, string.Empty, "政府采购", string.Empty, htmlTxt);
                            list.Add(info);
                            if (!string.IsNullOrEmpty(src))
                            {
                                string sql = string.Format("select Id from InviteInfo where InfoUrl='{0}'", info.InfoUrl);
                                object obj = ToolDb.ExecuteScalar(sql);
                                if (obj == null || obj.ToString() == "")
                                {
                                    try
                                    {
                                        BaseAttach attach = ToolHtml.GetBaseAttach(src, prjName, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                        if (attach != null)
                                        {
                                            ToolDb.SaveEntity(attach, "");
                                        }
                                    }
                                    catch { }
                                }
                            }
                            parser = new Parser(new Lexer(htmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k].GetATag();
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://dongcheng.dg.gov.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                        }
                    }
                    else
                    {
                        string code = string.Empty, buildUnit = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;
                        parser.Reset();
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("valign", "top")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode[0].ToHtml();

                            Parser   imgParser = new Parser(new Lexer(HtmlTxt.ToLower()));
                            NodeList imgNode   = imgParser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                            string   src       = string.Empty;
                            if (imgNode != null && imgNode.Count > 0)
                            {
                                string imgUrl = (imgNode[0] as ImageTag).GetAttribute("src");
                                src     = "http://dongcheng.dg.gov.cn/" + imgUrl;
                                HtmlTxt = HtmlTxt.ToLower().GetReplace(imgUrl, src);
                            }
                            inviteCtx = HtmlTxt.GetReplace("</p>", "\r\n").ToCtxString();

                            buildUnit  = inviteCtx.GetBuildRegex();
                            prjAddress = inviteCtx.GetAddressRegex();
                            code       = inviteCtx.GetCodeRegex();

                            specType   = "政府采购";
                            inviteType = prjName.GetInviteBidType();
                            msgType    = "东莞市东城区办事处";

                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "东莞市区", "东城区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, infoUrl, HtmlTxt);
                            list.Add(info);
                            if (!string.IsNullOrEmpty(src))
                            {
                                string sql = string.Format("select Id from InviteInfo where InfoUrl='{0}'", info.InfoUrl);
                                object obj = ToolDb.ExecuteScalar(sql);
                                if (obj == null || obj.ToString() == "")
                                {
                                    try
                                    {
                                        BaseAttach attach = ToolHtml.GetBaseAttach(src, prjName, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                        if (attach != null)
                                        {
                                            ToolDb.SaveEntity(attach, "");
                                        }
                                    }
                                    catch { }
                                }
                            }
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k].GetATag();
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://dongcheng.dg.gov.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                        }
                    }
                    if (!crawlAll && list.Count >= this.MaxCount)
                    {
                        return(list);
                    }
                }
            }
            return(list);
        }
Пример #25
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list     = new ArrayList();
            int   sqlCount = 0;
            //取得页码
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = ToolHtml.GetHtmlByUrlEncode(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                Logger.Error(ex.ToString());
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("cellspacing", "2"), new TagNameFilter("table")));

            if (sNode != null && sNode.Count > 0)
            {
                string pageString = sNode.AsString();
                Regex  regexPage  = new Regex(@",共[^页]+页,");
                Match  pageMatch  = regexPage.Match(pageString);
                try { pageInt = int.Parse(pageMatch.Value.Replace(",共", "").Replace("页,", "").Trim()); }
                catch (Exception) { }
            }
            string cookiestr = string.Empty;

            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "__VIEWSTATEENCRYPTED", "__EVENTVALIDATION", "ctl00$hdnPageCount" }, new string[] { "ctl00$Content$GridView1", "Page$" + i.ToString(), viewState, "", eventValidation, pageInt.ToString() });
                    html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8);
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "ctl00_Content_GridView1"), new TagNameFilter("table")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty,
                               inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty,
                               endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr = table.Rows[j] as TableRow;
                        code      = tr.Columns[1].ToPlainTextString().Trim();
                        prjName   = tr.Columns[2].ToPlainTextString().Trim();
                        buildUnit = tr.Columns[3].ToPlainTextString().Trim();
                        beginDate = tr.Columns[5].ToPlainTextString().Trim();
                        endDate   = tr.Columns[6].ToPlainTextString().Trim();
                        ATag aTag = tr.Columns[2].Children[0] as ATag;
                        InfoUrl = "http://www.szjsjy.com.cn/BusinessInfo/" + aTag.Link;
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = ToolHtml.GetHtmlByUrlEncode(InfoUrl, Encoding.UTF8).Replace("&nbsp;", "").Trim();
                            Parser   dtlparserHTML = new Parser(new Lexer(htmldetail));
                            NodeList dtnodeHTML    = dtlparserHTML.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "lblXXNR"), new TagNameFilter("span")));
                            HtmlTxt    = dtnodeHTML.AsHtml();
                            htmldetail = ToolHtml.GetHtmlByUrlEncode(InfoUrl, Encoding.UTF8).Replace("&nbsp;", "").Replace("</br>", "\r\n").Replace("<br>", "\r\n");
                        }
                        catch (Exception ex) { continue; }
                        Parser   dtlparser = new Parser(new Lexer(htmldetail));
                        NodeList dtnode    = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "lblXXNR"), new TagNameFilter("span")));

                        inviteCtx = dtnode.AsString().Replace(" ", "");
                        Regex regPrjAdd = new Regex(@"(工程地点|工程地址):[^\r\n]+[\r\n]{1}");
                        prjAddress = regPrjAdd.Match(inviteCtx).Value.Replace("工程地点:", "").Replace("工程地址:", "").Trim();
                        msgType    = "深圳市建设工程交易中心";
                        specType   = "建设工程";
                        Regex  regInvType = new Regex(@"[^\r\n]+[\r\n]{1}");
                        string InvType    = regInvType.Match(inviteCtx).Value;

                        inviteType = ToolHtml.GetInviteTypes(InvType);
                        #region 2013-11-19修改
                        Dictionary <string, Regex> dicRegex = new Dictionary <string, Regex>();
                        dicRegex.Add("重要提示", new Regex(@"([.\S\s]*)(?=重要提示)"));
                        dicRegex.Add("温馨提示", new Regex(@"([.\S\s]*)(?=温馨提示)"));
                        foreach (string dicValue in dicRegex.Keys)
                        {
                            if (inviteCtx.Contains(dicValue))
                            {
                                inviteCtx = dicRegex[dicValue].Match(inviteCtx).Value;
                            }
                        }
                        #endregion
                        InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳市工程", string.Empty, string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, string.Empty, InfoUrl, HtmlTxt);
                        if (!crawlAll && sqlCount >= this.MaxCount)
                        {
                            return(null);
                        }
                        sqlCount++;
                        if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx))
                        {
                            dtlparser.Reset();
                            NodeList dlNodes = dtlparser.ExtractAllNodesThatMatch(new TagNameFilter("a"));//
                            if (dlNodes != null && dlNodes.Count > 0)
                            {
                                for (int f = 0; f < dlNodes.Count; f++)
                                {
                                    ATag fileTag = dlNodes[f] as ATag;
                                    if (fileTag.IsAtagAttach())
                                    {
                                        //BaseAttach attach = ToolDb.GenBaseAttach(fileTag.StringText, info.Id, fileTag.Link.Replace("..", "http://www.szjsjy.com.cn"));
                                        try
                                        {
                                            BaseAttach attach = ToolHtml.GetBaseAttach(fileTag.Link.Replace("..", "http://www.szjsjy.com.cn"), fileTag.LinkText, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                            if (attach != null)
                                            {
                                                ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                                            }
                                        }
                                        catch { }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Пример #26
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            //取得页码
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("name", "PageListControl1$ctl06")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    SelectTag tag  = pageNode[0] as SelectTag;
                    string    temp = tag.OptionTags[tag.OptionTags.Length - 1].Value;
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__VIEWSTATE",
                        "__EVENTVALIDATION",
                        "PageListControl1$ctl03",
                        "PageListControl1$ctl06",
                        "select2"
                    }, new string[] {
                        viewState,
                        eventValidation,
                        "下一页",
                        (i - 1).ToString(),
                        "** 站点链接 **"
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "Listbody")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount - 1; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;
                        msgType  = "上海市建筑业管理办公室";
                        infoType = "通知公告";
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[0].GetATag();
                        headName    = aTag.LinkText.GetReplace("·, ");
                        releaseTime = tr.Columns[1].ToPlainTextString().GetDateRegex();

                        infoUrl = "http://www.ciac.sh.cn/newsdata/" + aTag.GetAttribute("onclick").GetRegexBegEnd("'", "'");
                        if (infoUrl.IsAtagAttach())
                        {
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "上海市", "上海市区", string.Empty, infoCtx, infoType);
                            sqlCount++;
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                            {
                                BaseAttach entity = null;
                                try
                                {
                                    entity = ToolHtml.GetBaseAttach(infoUrl, headName, info.Id);
                                    if (entity != null)
                                    {
                                        ToolDb.SaveEntity(entity, string.Empty);
                                    }
                                }
                                catch { }
                            }
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            continue;
                        }
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("width", "771")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            ctxHtml = dtlNode.AsHtml();
                            if (headName.Contains("..."))
                            {
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList pNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("p"), new HasAttributeFilter("class", "bb")));
                                if (pNode != null && pNode.Count > 0)
                                {
                                    string temp = pNode[0].ToNodePlainString();
                                    headName = string.IsNullOrEmpty(temp) ? headName : temp;
                                }
                            }
                            infoCtx = ctxHtml.ToCtxString();
                            List <string> listImg = new List <string>();
                            parser = new Parser(new Lexer(ctxHtml));
                            NodeList imgNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                            if (imgNode != null && imgNode.Count > 0)
                            {
                                for (int m = 0; m < imgNode.Count; m++)
                                {
                                    string link = "http://www.ciac.sh.cn/newsdata/" + (imgNode[m] as ImageTag).ImageURL;
                                    listImg.Add(link);
                                    ctxHtml = ctxHtml.GetReplace((imgNode[m] as ImageTag).ImageURL, link);
                                }
                            }

                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "上海市", "上海市区", string.Empty, infoCtx, infoType);
                            sqlCount++;
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                            {
                                if (listImg.Count > 0)
                                {
                                    for (int a = 0; a < listImg.Count; a++)
                                    {
                                        BaseAttach entity = null;
                                        try
                                        {
                                            entity = ToolHtml.GetBaseAttach(listImg[0], headName, info.Id);
                                            if (entity != null)
                                            {
                                                ToolDb.SaveEntity(entity, string.Empty);
                                            }
                                        }
                                        catch { }
                                    }
                                }
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k].GetATag();
                                        if (a.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = a.Link;
                                            }
                                            else
                                            {
                                                link = "http://www.ciac.sh.cn/newsdata/" + a.Link;
                                            }
                                            BaseAttach entity = null;
                                            try
                                            {
                                                entity = ToolHtml.GetBaseAttach(link, a.LinkText, info.Id);
                                                if (entity != null)
                                                {
                                                    ToolDb.SaveEntity(entity, string.Empty);
                                                }
                                            }
                                            catch { }
                                        }
                                    }
                                }
                            }
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                        }
                    }
                }
            }
            return(null);
        }
Пример #27
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "0h120")), true), new TagNameFilter("a")));

            if (sNode != null && sNode.Count > 0)
            {
                try
                {
                    string temp = sNode[sNode.Count - 2].ToNodePlainString();
                    pageInt = Convert.ToInt32(temp.GetReplace("[,]"));
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&page=" + i, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList viewList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "0h120")));
                if (viewList != null && viewList.Count > 0)
                {
                    for (int j = 0; j < viewList.Count; j++)
                    {
                        TableTag table = viewList[j] as TableTag;
                        string   prjName = string.Empty, InfoUrl = string.Empty, beginDate = string.Empty, HtmlTxt = string.Empty;
                        ATag     aTag = viewList[j].GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }
                        prjName   = aTag.GetAttribute("title").Trim().GetReplace(" ");
                        beginDate = table.ToNodePlainString().GetDateRegex();
                        InfoUrl   = "http://huangbu.huidong.gov.cn/" + aTag.Link;
                        string htlDtl = string.Empty;
                        try
                        {
                            htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htlDtl));
                        NodeList dtl = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "fontzoom")));
                        if (dtl != null && dtl.Count > 0)
                        {
                            HtmlTxt = dtl.AsHtml();

                            if (prjName.Contains("中标") || prjName.Contains("成交") || prjName.Contains("结果"))
                            {
                                string buildUnit = string.Empty, bidUnit = string.Empty,
                                       bidMoney = string.Empty, code = string.Empty,
                                       bidDate = string.Empty,
                                       endDate = string.Empty, bidType = string.Empty,
                                       specType = string.Empty,
                                       msgType = string.Empty, bidCtx = string.Empty,
                                       prjAddress = string.Empty, remark = string.Empty,
                                       prjMgr = string.Empty, otherType = string.Empty;
                                bidCtx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString();

                                string tempName = bidCtx.GetRegex("工程名称,项目名称");
                                if (!string.IsNullOrEmpty(tempName))
                                {
                                    prjName = tempName;
                                }
                                code      = bidCtx.GetCodeRegex().GetCodeDel();
                                buildUnit = bidCtx.GetBuildRegex();
                                if (buildUnit.Contains("招标代理"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理"));
                                }
                                if (buildUnit.Contains("公司"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                                }

                                bidUnit = bidCtx.GetBidRegex();
                                if (string.IsNullOrEmpty(bidUnit))
                                {
                                    bidUnit = bidCtx.GetRegex("中标候选公司,中标候选人");
                                }
                                bidMoney = bidCtx.GetMoneyRegex();
                                if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0")
                                {
                                    string ctx = bidCtx.GetReplace("元\r\n,元;\r\n", "元kdxx").GetRegexBegEnd("中标价", "kdxx");
                                    bidMoney = ctx.GetMoney("万元");
                                }
                                try
                                {
                                    if (decimal.Parse(bidMoney) > 100000)
                                    {
                                        bidMoney = (decimal.Parse(bidMoney) / 10000).ToString();
                                    }
                                }
                                catch { }
                                Parser   imgParser = new Parser(new Lexer(HtmlTxt.ToLower()));
                                NodeList imgNode   = imgParser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                                string   src       = string.Empty;
                                if (imgNode != null && imgNode.Count > 0)
                                {
                                    string imgUrl = (imgNode[0] as ImageTag).GetAttribute("src");
                                    src     = "http://huangbu.huidong.gov.cn/" + imgUrl;
                                    HtmlTxt = HtmlTxt.ToLower().GetReplace(imgUrl, src);
                                }
                                msgType  = "惠东县黄埠镇人民政府";
                                specType = "政府采购";
                                bidType  = prjName.GetInviteBidType();
                                BidInfo info = ToolDb.GenBidInfo("广东省", "惠州市区", "惠东县", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType,
                                                                 bidMoney, InfoUrl, prjMgr, HtmlTxt);
                                list.Add(info);
                                if (!string.IsNullOrEmpty(src))
                                {
                                    string sql = string.Format("select Id from BidInfo where InfoUrl='{0}'", info.InfoUrl);
                                    object obj = ToolDb.ExecuteScalar(sql);
                                    if (obj == null || obj.ToString() == "")
                                    {
                                        try
                                        {
                                            BaseAttach attach = ToolHtml.GetBaseAttach(src, prjName, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                            if (attach != null)
                                            {
                                                ToolDb.SaveEntity(attach, "");
                                            }
                                        }
                                        catch { }
                                    }
                                }
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k].GetATag();
                                        if (a.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = a.Link;
                                            }
                                            else
                                            {
                                                link = "http://huangbu.huidong.gov.cn/" + a.Link;
                                            }
                                            BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                            base.AttachList.Add(attach);
                                        }
                                    }
                                }
                                if (!crawlAll && list.Count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }
                            else
                            {
                                string code = string.Empty, buildUnit = string.Empty,
                                       prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                                       specType = string.Empty, endDate = string.Empty,
                                       remark = string.Empty, inviteCon = string.Empty,
                                       CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty;

                                inviteCtx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString();

                                string tempName = inviteCtx.GetRegex("工程名称,项目名称");
                                if (!string.IsNullOrEmpty(tempName))
                                {
                                    prjName = tempName;
                                }
                                inviteType = prjName.GetInviteBidType();

                                code       = inviteCtx.GetCodeRegex().GetCodeDel();
                                buildUnit  = inviteCtx.GetBuildRegex();
                                prjAddress = inviteCtx.GetAddressRegex();
                                if (buildUnit.Contains("招标代理"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理"));
                                }
                                if (buildUnit.Contains("公司"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                                }

                                Parser   imgParser = new Parser(new Lexer(HtmlTxt.ToLower()));
                                NodeList imgNode   = imgParser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                                string   src       = string.Empty;
                                if (imgNode != null && imgNode.Count > 0)
                                {
                                    string imgUrl = (imgNode[0] as ImageTag).GetAttribute("src");
                                    src     = "http://huangbu.huidong.gov.cn/" + imgUrl;
                                    HtmlTxt = HtmlTxt.ToLower().GetReplace(imgUrl, src);
                                }
                                msgType = "惠东县黄埠镇人民政府";

                                specType = "政府采购";

                                InviteInfo info = ToolDb.GenInviteInfo("广东省", "惠州市区", "惠东县", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                                list.Add(info);
                                if (!string.IsNullOrEmpty(src))
                                {
                                    string sql = string.Format("select Id from InviteInfo where InfoUrl='{0}'", info.InfoUrl);
                                    object obj = ToolDb.ExecuteScalar(sql);
                                    if (obj == null || obj.ToString() == "")
                                    {
                                        try
                                        {
                                            BaseAttach attach = ToolHtml.GetBaseAttach(src, prjName, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                            if (attach != null)
                                            {
                                                ToolDb.SaveEntity(attach, "");
                                            }
                                        }
                                        catch { }
                                    }
                                }
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k].GetATag();
                                        if (a.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = a.Link;
                                            }
                                            else
                                            {
                                                link = "http://huangbu.huidong.gov.cn/" + a.Link;
                                            }
                                            BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                            base.AttachList.Add(attach);
                                        }
                                    }
                                }
                                if (!crawlAll && list.Count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Пример #28
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list     = new List <ProjectResult>();
            int    sqlCount = 0;
            string html     = string.Empty;
            List <Dictionary <string, object> > dicFile = new List <Dictionary <string, object> >();

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + (MaxCount + 20));
            }
            catch { return(null); }
            int startIndex = html.IndexOf("{");
            int endIndex   = html.LastIndexOf("}");

            html = html.Substring(startIndex, (endIndex + 1) - startIndex);
            JavaScriptSerializer        serializer  = new JavaScriptSerializer();
            Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html);

            foreach (KeyValuePair <string, object> obj in smsTypeJson)
            {
                if (obj.Key == "total")
                {
                    continue;
                }
                object[] array = (object[])obj.Value;

                foreach (object arrValue in array)
                {
                    string Code = string.Empty, prjName = string.Empty, BuildUnit = string.Empty, FinalistsWay = string.Empty, RevStaMethod = string.Empty, SetStaMethod = string.Empty, VoteMethod = string.Empty, RevStaDate = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty, ProjectCtx = string.Empty, HtmlTxt = string.Empty, beginDate = string.Empty, attachFileGroupGuid = string.Empty, dbJieGuoGuid = string.Empty, ggGuid = string.Empty, bdGuid = string.Empty, gcLeiXing = string.Empty, zbrName = string.Empty, zhongBiaoJia = string.Empty, jsonHtml = string.Empty;
                    Dictionary <string, object> dic = (Dictionary <string, object>)arrValue;
                    Code    = Convert.ToString(dic["bdBH"]);
                    prjName = Convert.ToString(dic["bdName"]);
                    //if (!prjName.Contains("茅洲河(光明新区)水环境综合整治工程项目(水景观")) continue;

                    beginDate    = Convert.ToString(dic["createTime2"]);
                    InfoUrl      = Convert.ToString(dic["detailUrl"]);
                    dbJieGuoGuid = Convert.ToString(dic["dbJieGuoGuid"]);
                    ggGuid       = Convert.ToString(dic["ggGuid"]);
                    bdGuid       = Convert.ToString(dic["bdGuid"]);
                    gcLeiXing    = Convert.ToString(dic["gcLeiXing"]);
                    //zbrName = Convert.ToString(dic["zbrName"]);
                    zhongBiaoJia = Convert.ToString(dic["zhongBiaoJia"]).GetMoney();

                    string crawlUrl = "https://www.szjsjy.com.cn:8001/jyw/queryOldDataDetail.do?type=9&id=" + Code;
                    try
                    {
                        jsonHtml = this.ToolWebSite.GetHtmlByUrl(crawlUrl).GetJsString().GetReplace("\\t,\\r,\\n,\"");
                    }
                    catch { }
                    if (!jsonHtml.Contains("<div") || string.IsNullOrEmpty(jsonHtml))
                    {
                        try
                        {
                            if (string.IsNullOrEmpty(jsonHtml))
                            {
                                crawlUrl = "https://www.szjsjy.com.cn:8001/jyw/queryDbJieGuoByGuid.do?guid=" + Convert.ToString(dic["dbJieGuoGuid"]);
                                jsonHtml = this.ToolWebSite.GetHtmlByUrl(crawlUrl).GetJsString().GetReplace("\\t,\\r,\\n,\"").GetReplace(":RMB:", ":");
                            }

                            string strHtml = PrjResultStr(jsonHtml);//.Replace("A,B,C","")
                            JavaScriptSerializer        serializerNew  = new JavaScriptSerializer();
                            Dictionary <string, object> smsTypeJsonNew = null;
                            try
                            {
                                smsTypeJsonNew = (Dictionary <string, object>)serializer.DeserializeObject(strHtml);
                            }
                            catch
                            {
                                try
                                {
                                    strHtml        = PrjResultStr(jsonHtml, true);
                                    smsTypeJsonNew = (Dictionary <string, object>)serializer.DeserializeObject(strHtml);
                                }
                                catch
                                {
                                    try
                                    {
                                        strHtml        = GetPrjResultDtl(strHtml);
                                        smsTypeJsonNew = (Dictionary <string, object>)serializer.DeserializeObject(strHtml);
                                    }
                                    catch { }
                                }
                            }
                            string ggBdGuid = string.Empty, dbBanFa = string.Empty, piaoJueBanFa = string.Empty, dbTime = string.Empty, isChouQian = string.Empty,
                                   chouQianRuWeiFangShi = string.Empty, rwFangShi = string.Empty, zbName = string.Empty, tongYongZhongBiaoJia = string.Empty,
                                   isDuiWaiGongShi = string.Empty, isYiYiTime = string.Empty, Lxr = string.Empty, LxDh = string.Empty, jsDw = string.Empty,
                                   ggMc = string.Empty, bdBh = string.Empty, ggShiXiangGuid = string.Empty,
                                   isHeSuan = string.Empty, gongQi = string.Empty, isTiJiaoDbwy = string.Empty, isXuYaoZuJianDbwyh = string.Empty;

                            Dictionary <string, object> bd = (Dictionary <string, object>)smsTypeJsonNew["bd"];
                            Dictionary <string, object> gc = (Dictionary <string, object>)bd["gc"];
                            try { ggShiXiangGuid = bd["ggShiXiangGuid"].ToString(); }
                            catch { }
                            try { ggMc = bd["bdName"].ToString(); }
                            catch { }
                            try { bdBh = bd["bdBH"].ToString(); }
                            catch { }
                            try { ggBdGuid = smsTypeJsonNew["ggBdGuid"].ToString(); }
                            catch { }
                            try { dbBanFa = smsTypeJsonNew["dbBanFa"].ToString(); }
                            catch
                            {
                                try { dbBanFa = bd["dbBanFa"].ToString(); }
                                catch { }
                            }
                            try { piaoJueBanFa = smsTypeJsonNew["piaoJueBanFa"].ToString(); }
                            catch { }
                            try { dbTime = smsTypeJsonNew["dbTime"].ToString(); dbTime = ToolHtml.GetDateTimeByLong(long.Parse(dbTime)).ToString(); }
                            catch { }
                            try { isChouQian = smsTypeJsonNew["isChouQian"].ToString(); }
                            catch { }
                            try { chouQianRuWeiFangShi = smsTypeJsonNew["chouQianRuWeiFangShi"].ToString(); }
                            catch { }
                            try { rwFangShi = smsTypeJsonNew["rwFangShi"].ToString(); }
                            catch { }
                            try { zbName = smsTypeJsonNew["zbName"].ToString(); }
                            catch { }
                            try { tongYongZhongBiaoJia = smsTypeJsonNew["tongYongZhongBiaoJia"].ToString(); }
                            catch { }
                            try { isDuiWaiGongShi = smsTypeJsonNew["isDuiWaiGongShi"].ToString(); }
                            catch { }
                            try { isYiYiTime = smsTypeJsonNew["isYiYiTime"].ToString(); }
                            catch { }
                            try { isHeSuan = smsTypeJsonNew["isHeSuan"].ToString(); }
                            catch { }
                            try { gongQi = smsTypeJsonNew["gongQi"].ToString(); }
                            catch { }
                            try { isTiJiaoDbwy = smsTypeJsonNew["isTiJiaoDbwy"].ToString(); }
                            catch { }
                            try { isXuYaoZuJianDbwyh = smsTypeJsonNew["isXuYaoZuJianDbwyh"].ToString(); }
                            catch { }
                            try { Lxr = gc["jingBanRenName"].ToString(); }
                            catch { try { Lxr = gc["lianXiRenName"].ToString(); } catch { } }
                            try { LxDh = gc["lianXiRenMobile"].ToString(); }
                            catch { try { LxDh = gc["lianXiRenPhone"].ToString(); } catch { } }
                            try { jsDw = gc["zbRName"].ToString(); }
                            catch { }
                            try { attachFileGroupGuid = smsTypeJsonNew["attachFileGroupGuid"].ToString(); }
                            catch { }
                            if (dbBanFa.IsNumber())
                            {
                                dbBanFa = "无";
                            }
                            string dtlHtml = string.Empty;
                            string dtlUrl  = "https://www.szjsjy.com.cn:8001/jyw/jyw/dbResult_View.do?bdGuid=" + ggGuid;
                            try
                            {
                                dtlHtml = this.ToolWebSite.GetHtmlByUrl(dtlUrl).GetJsString().GetReplace("\\t,\\r,\\n,\"");
                                if (string.IsNullOrEmpty(dtlHtml) || dtlHtml.Length < 10)
                                {
                                    dtlUrl  = "https://www.szjsjy.com.cn:8001/jyw/queryPmxtTbrListGs.do?dbGuid=" + ggGuid;
                                    dtlHtml = this.ToolWebSite.GetHtmlByUrl(dtlUrl).GetJsString().GetReplace("\\t,\\r,\\n,\"");
                                }
                            }
                            catch { Logger.Error(prjName); continue; }
                            if (!string.IsNullOrEmpty(dtlHtml) && dtlHtml.Length > 10)
                            {
                                HtmlTxt = dtlHtml;
                                Parser   parserNew = new Parser(new Lexer(HtmlTxt));
                                NodeList tableNode = parserNew.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "de_tab1")));
                                if (tableNode != null && tableNode.Count > 0)
                                {
                                    HtmlTxt = tableNode.AsHtml();
                                    HtmlTxt = HtmlTxt.GetReplace("<td  id=ggName>&nbsp;</td>", "<td  id=\"ggName\">&nbsp;" + ggMc + "</td>");
                                    HtmlTxt = HtmlTxt.GetReplace("<td id=bdBH>&nbsp;</td>", "<td  id=\"bdBH\">&nbsp;" + bdBh + "</td>");
                                    HtmlTxt = HtmlTxt.GetReplace("<td id=bdName>&nbsp;</td>", "<td  id=\"bdName\">&nbsp;" + ggMc + "</td>");
                                    HtmlTxt = HtmlTxt.GetReplace("<td id=zbRName>&nbsp;</td>", "<td  id=\"zbRName\">&nbsp;" + jsDw + "</td>");
                                    HtmlTxt = HtmlTxt.GetReplace("<td id=zbName>&nbsp;</td>", "<td  id=\"zbName\">&nbsp;" + zbName + "</td>");
                                    HtmlTxt = HtmlTxt.GetReplace("<td id=dbTime>&nbsp;</td>", "<td  id=\"dbTime\">&nbsp;" + dbTime + "</td>");
                                    HtmlTxt = HtmlTxt.GetReplace("<td id=rwfs>&nbsp;</td>", "<td  id=\"rwfs\">&nbsp;" + rwFangShi + "</td>");
                                    HtmlTxt = HtmlTxt.GetReplace("<td id=dbBanFa>&nbsp;</td>", "<td  id=\"dbBanFa\">&nbsp;" + dbBanFa + "</td>");
                                    HtmlTxt = HtmlTxt.GetReplace("<td id=lianXiRenName>&nbsp;</td>", "<td  id=\"lianXiRenName\">&nbsp;" + Lxr + "</td>");
                                    HtmlTxt = HtmlTxt.GetReplace("<td id=lianXiRenPhone>&nbsp;</td>", "<td  id=\"lianXiRenName\">&nbsp;" + LxDh + "</td>");

                                    string resultUrl  = "https://www.szjsjy.com.cn:8001/jyw/queryTbrListByBdGuidAndGgGuidForGs.do";
                                    string jsonResult = string.Empty;
                                    try
                                    {
                                        NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "bdGuid", "ggGuid" }, new string[] { bdGuid, ggShiXiangGuid });
                                        jsonResult = this.ToolWebSite.GetHtmlByUrl(resultUrl, nvc).GetJsString().GetReplace("\\t,\\r,\\n,\"");

                                        if (string.IsNullOrEmpty(jsonResult) || jsonResult.Length <= 10)
                                        {
                                            nvc        = this.ToolWebSite.GetNameValueCollection(new string[] { "bdGuid", "ggGuid" }, new string[] { bdGuid, ggGuid });
                                            jsonResult = this.ToolWebSite.GetHtmlByUrl(resultUrl, nvc).GetJsString().GetReplace("\\t,\\r,\\n,\"");
                                        }
                                    }
                                    catch (Exception ex) { Logger.Error(prjName); }
                                    if (!string.IsNullOrEmpty(jsonResult) && jsonResult.Length >= 10)
                                    {
                                        string jiHua_LiXiang_BH = string.Empty, jiHua_LiXiang_BH2 = string.Empty;
                                        try
                                        {
                                            Dictionary <string, object> xm = (Dictionary <string, object>)bd["xm"];
                                            jiHua_LiXiang_BH  = xm["jiHua_LiXiang_BH"].ToString().GetReplace("【", "[").GetReplace("】", "]");
                                            jiHua_LiXiang_BH2 = xm["jiHua_LiXiang_BH"].ToString();
                                        }
                                        catch { }
                                        string tempJson = jsonResult;
                                        if (!string.IsNullOrEmpty(jiHua_LiXiang_BH))
                                        {
                                            tempJson = jsonResult.Replace(jiHua_LiXiang_BH, jiHua_LiXiang_BH2);
                                        }
                                        string dtlTbName = PrjResultStr(tempJson, true);
                                        JavaScriptSerializer serializerDtl = new JavaScriptSerializer();
                                        object[]             dtlObj        = null;
                                        try
                                        {
                                            dtlObj = (object[])serializerDtl.DeserializeObject(dtlTbName);
                                        }
                                        catch
                                        {
                                            try
                                            {
                                                dtlTbName  = dtlTbName.Substring(0, dtlTbName.Length - 2);
                                                dtlTbName += "}}]";
                                                dtlObj     = (object[])serializerDtl.DeserializeObject(dtlTbName);
                                            }
                                            catch
                                            {
                                                try
                                                {
                                                    dtlTbName = dtlTbName.Trim().Replace("},{", "}},{");
                                                    dtlObj    = (object[])serializerDtl.DeserializeObject(dtlTbName);
                                                }
                                                catch
                                                {
                                                    Logger.Error(prjName);
                                                }
                                            }
                                        }

                                        bool isOk = false;

                                        StringBuilder sb = new StringBuilder();
                                        if (dbBanFa.Contains("逐轮票决"))
                                        {
                                            StringBuilder strZlpj = new StringBuilder();

                                            List <PrjResult> prjResluts = LPrjResult.GetPrjZlResult(dtlObj);
                                            IEnumerable <IGrouping <int, PrjResult> > ienums = prjResluts.GroupBy(x => x.lunCiXuHao).OrderBy(x => x.Key);

                                            foreach (IGrouping <int, PrjResult> groups in ienums)
                                            {
                                                strZlpj.AppendFormat("<h3>第{0}大轮投票表</h3>", groups.Key);
                                                strZlpj.Append("<table width='100%' border='0' class='de_tab2'>");
                                                strZlpj.Append("<tr>");
                                                strZlpj.Append("<th style='text-align: left' class='bg_tdtop'>编号</th>");
                                                strZlpj.Append("<th style='text-align: left' class='bg_tdtop'>投标单位</th>");
                                                strZlpj.Append("<th style='text-align: left' class='bg_tdtop'>得票数</th>");
                                                strZlpj.Append("<th style='text-align: left' class='bg_tdtop'>排名</th>");
                                                strZlpj.Append("</tr>");

                                                List <PrjResult> results = groups.ToList().OrderBy(x => x.Bh).ToList();

                                                foreach (PrjResult prj in results)
                                                {
                                                    strZlpj.Append("<tr>");
                                                    strZlpj.Append("<th style='padding: 0px'>" + prj.Bh + "</th>");
                                                    strZlpj.Append("<th style='padding: 0px' class='bg_tdtop'>" + prj.Name + "</th>");
                                                    strZlpj.Append("<th style='padding: 0px' class='bg_tdtop'>" + prj.Mc + "</th>");
                                                    strZlpj.Append("<th style='padding: 0px' class='bg_tdtop'>" + prj.Xh + "</th>");
                                                    strZlpj.Append("</tr>");
                                                    isOk = true;
                                                }

                                                strZlpj.Append("</table>");
                                            }
                                            sb.Append(strZlpj.ToString());
                                        }
                                        else
                                        {
                                            StringBuilder strTmp = new StringBuilder();
                                            strTmp.Append("<table width='100%' border='0' class='de_tab2'>");


                                            switch (dbBanFa)
                                            {
                                            case "直接票决":
                                                strTmp.Append("<tr>");
                                                strTmp.Append("<th style='text-align: left' class='bg_tdtop'>编号</th>");
                                                strTmp.Append("<th style='text-align: left' class='bg_tdtop'>投标单位</th>");
                                                strTmp.Append("<th style='text-align: left' class='bg_tdtop'>取胜次数</th>");
                                                strTmp.Append("<th style='text-align: left' class='bg_tdtop'>排名</th>");
                                                strTmp.Append("</tr>");
                                                List <PrjResult> PrjResults = LPrjResult.GetPrjResult(dtlObj);
                                                foreach (PrjResult prj in PrjResults)
                                                {
                                                    strTmp.Append("<tr>");
                                                    strTmp.Append("<th style='padding: 0px'>" + prj.Bh + "</th>");
                                                    strTmp.Append("<th style='padding: 0px' class='bg_tdtop'>" + prj.Name + "</th>");
                                                    strTmp.Append("<th style='padding: 0px' class='bg_tdtop'>" + prj.Mc + "</th>");
                                                    strTmp.Append("<th style='padding: 0px' class='bg_tdtop'>" + prj.Xh + "</th>");
                                                    strTmp.Append("</tr>");
                                                    isOk = true;
                                                }
                                                break;

                                            default:
                                                strTmp.Append("<tr>");
                                                strTmp.Append("<th style='text-align: left' class='bg_tdtop'>序号</th>");
                                                strTmp.Append("<th style='text-align: left' class='bg_tdtop'>企业名称</th>");
                                                strTmp.Append("<th style='text-align: left' class='bg_tdtop'>投标时间</th>");
                                                strTmp.Append("<th style='text-align: left' class='bg_tdtop'>中标候选人</th>");
                                                strTmp.Append("</tr>");
                                                List <PrjResult> PrjResultBid = LPrjResult.GetPrjResultBid(dtlObj);
                                                foreach (PrjResult prj in PrjResultBid)
                                                {
                                                    strTmp.Append("<tr>");
                                                    strTmp.Append("<th style='padding: 0px'>" + prj.Xh + "</th>");
                                                    strTmp.Append("<th style='padding: 0px' class='bg_tdtop'>" + prj.Name + "</th>");
                                                    strTmp.Append("<th style='padding: 0px' class='bg_tdtop'>" + prj.Date + "</th>");
                                                    strTmp.Append("<th style='padding: 0px' class='bg_tdtop'>" + prj.IsBid + "</th>");
                                                    strTmp.Append("</tr>");
                                                    isOk = true;
                                                }
                                                break;
                                            }
                                            strTmp.Append("</table>");

                                            sb.Append(strTmp.ToString());
                                        }


                                        if (isOk)
                                        {
                                            HtmlTxt += sb;
                                        }
                                    }
                                }
                            }
                        }
                        catch { }
                        if (!string.IsNullOrEmpty(attachFileGroupGuid))
                        {
                            bool          FileOk = false;
                            StringBuilder sb     = new StringBuilder();
                            try
                            {
                                sb.Append("<table id=\"wenJian_List\" width=\"100%\" border=\"0\" class=\"de_tab2\">");
                                sb.Append("<tr>");
                                sb.Append("<td class=\"bg_tdtop\">序号</td>");
                                sb.Append("<td class=\"bg_tdtop\" >文件名</td>");
                                sb.Append("<td class=\"bg_tdtop\">创建时间</td>");
                                sb.Append("</tr>");
                                string url        = "https://www.szjsjy.com.cn:8001/jyw/filegroup/queryByGroupGuidZS.do?groupGuid=" + attachFileGroupGuid;
                                string attachHtml = this.ToolWebSite.GetHtmlByUrl(url);
                                JavaScriptSerializer        newSerializer = new JavaScriptSerializer();
                                Dictionary <string, object> newTypeJson = (Dictionary <string, object>)newSerializer.DeserializeObject(attachHtml);
                                string attachGuid = string.Empty, attachName = string.Empty, createTime = string.Empty;
                                foreach (KeyValuePair <string, object> newObj in newTypeJson)
                                {
                                    object[] newArray = (object[])newObj.Value;
                                    int      row      = 1;
                                    foreach (object newArr in newArray)
                                    {
                                        Dictionary <string, object> newDic = (Dictionary <string, object>)newArr;
                                        try
                                        {
                                            dicFile.Add(newDic);
                                            attachGuid = Convert.ToString(newDic["attachGuid"]);
                                            attachName = Convert.ToString(newDic["attachName"]);
                                            createTime = Convert.ToString(newDic["createTime"]);
                                            if (!string.IsNullOrEmpty(createTime))
                                            {
                                                createTime = ToolHtml.GetDateTimeByLong(long.Parse(createTime)).ToString();
                                            }
                                            string newUrl = "https://www.szjsjy.com.cn:8001/file/downloadFile?fileId=" + attachGuid;
                                            string aTag   = "<a href='" + newUrl + "'  target='_blank'>" + attachName + "</a>";
                                            sb.Append("<tr>");
                                            sb.Append("<td>" + row + "</td>");
                                            sb.Append("<td>" + aTag + "</td>");
                                            sb.Append("<td>" + createTime + "</td>");
                                            sb.Append("</tr>");
                                            row++;
                                            FileOk = true;
                                        }
                                        catch { }
                                    }
                                }
                                sb.Append("</table>");
                            }
                            catch { }
                            if (FileOk)
                            {
                                HtmlTxt += sb.ToString();
                            }
                        }
                    }
                    else
                    {
                        HtmlTxt = jsonHtml;
                        Parser   parserA = new Parser(new Lexer(HtmlTxt));
                        NodeList aNode   = parserA.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                        if (aNode != null && aNode.Count > 0)
                        {
                            for (int i = 0; i < aNode.Count; i++)
                            {
                                ATag aTag = aNode[i] as ATag;
                                if (aTag.IsAtagAttach())
                                {
                                    Dictionary <string, object> fileDic = new Dictionary <string, object>();
                                    fileDic.Add("attachGuid", aTag.Link.GetReplace("\\"));
                                    fileDic.Add("attachName", aTag.LinkText.ToNodeString());
                                    dicFile.Add(fileDic);
                                }
                            }
                        }
                    }
                    ProjectCtx = HtmlTxt.GetReplace("<br />,<br/>,</ br>,</br>", "\r\n").ToCtxString() + "\r\n";
                    Parser   parser  = new Parser(new Lexer(HtmlTxt.GetReplace("th", "td")));
                    NodeList ctxNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                    if (ctxNode != null && ctxNode.Count > 0)
                    {
                        string   dtlCtx   = string.Empty;
                        TableTag ctxTable = ctxNode[0] as TableTag;
                        for (int d = 0; d < ctxTable.RowCount; d++)
                        {
                            for (int k = 0; k < ctxTable.Rows[d].ColumnCount; k++)
                            {
                                if ((k + 1) % 2 == 0)
                                {
                                    dtlCtx += ctxTable.Rows[d].Columns[k].ToNodePlainString() + "\r\n";
                                }
                                else
                                {
                                    dtlCtx += ctxTable.Rows[d].Columns[k].ToNodePlainString() + ":";
                                }
                            }
                        }
                        BuildUnit    = dtlCtx.GetRegex("建设单位");
                        FinalistsWay = dtlCtx.GetRegex("入围方式");
                        RevStaMethod = dtlCtx.GetRegex("评标方法");
                        SetStaMethod = dtlCtx.GetRegex("定标方法");
                        VoteMethod   = dtlCtx.GetRegex("票决方法");
                        RevStaDate   = dtlCtx.GetRegex("定标时间").GetDateRegex("yyyy/MM/dd");

                        if (!SetStaMethod.IsChina())
                        {
                            SetStaMethod = "";
                        }
                    }
                    MsgType = "深圳市建设工程交易中心";

                    sqlCount++;
                    if (!crawlAll && sqlCount >= this.MaxCount)
                    {
                        return(list);
                    }

                    ProjectResult info = ToolDb.GetProjectResult("广东省", "深圳市工程", "", Code, prjName, BuildUnit, FinalistsWay, RevStaMethod, SetStaMethod, VoteMethod, RevStaDate, InfoUrl, MsgType, ProjectCtx, HtmlTxt, beginDate);


                    if (prjName.Contains("深圳广电金融中心施工总承包工程"))
                    {
                        string delSql = string.Format("delete from ProjectResult where InfoUrl='{0}'", info.InfoUrl);
                        ToolDb.ExecuteSql(delSql);
                    }

                    if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                    {
                        if (this.ExistsUpdate)
                        {
                            object id = ToolDb.ExecuteScalar(string.Format("select Id from ProjectResult where InfoUrl='{0}'", info.InfoUrl));
                            if (id != null)
                            {
                                string sql = string.Format("delete from ProjectResultDtl where SourceId='{0}'", id);
                                ToolDb.ExecuteSql(sql);
                                string sqlAttach = string.Format("delete from BaseAttach where SourceId='{0}'", id);
                                ToolDb.ExecuteSql(sqlAttach);
                            }
                        }
                        if (dicFile.Count > 0)
                        {
                            try
                            {
                                foreach (Dictionary <string, object> newDic in dicFile)
                                {
                                    try
                                    {
                                        string attachGuid = Convert.ToString(newDic["attachGuid"]);
                                        string attachName = Convert.ToString(newDic["attachName"]);
                                        string newUrl     = string.Empty;
                                        if (attachGuid.ToLower().Contains("http"))
                                        {
                                            newUrl = attachGuid;
                                        }
                                        else
                                        {
                                            newUrl = "https://www.szjsjy.com.cn:8001/file/downloadFile?fileId=" + attachGuid;
                                        }


                                        BaseAttach attach = ToolHtml.GetBaseAttach(newUrl, attachName, info.Id, "SiteManage\\Files\\Attach\\");
                                        if (attach == null)
                                        {
                                            attach = ToolHtml.GetBaseAttach(newUrl, attachName, info.Id, "SiteManage\\Files\\Attach\\");
                                        }

                                        if (attach != null)
                                        {
                                            ToolDb.SaveEntity(attach, string.Empty);
                                        }
                                    }
                                    catch
                                    {
                                        continue;
                                    }
                                }
                            }
                            catch { }
                        }

                        //if (ctxNode != null && ctxNode.Count > 1)
                        //{
                        //    TableTag prjTable = ctxNode[1] as TableTag;
                        //    string colName1 = prjTable.Rows[0].Columns[2].ToNodePlainString();
                        //    string colName2 = prjTable.Rows[0].Columns[3].ToNodePlainString();
                        //    for (int c = 2; c < prjTable.RowCount; c++)
                        //    {
                        //        TableRow dr = prjTable.Rows[c];

                        //        string UnitName = string.Empty, BidDate = string.Empty, IsBid = string.Empty, Ranking = string.Empty, WinNumber = string.Empty, TicketNumber = string.Empty;

                        //        UnitName = dr.Columns[1].ToNodePlainString();
                        //        if (colName1.Contains("投标时间") || colName1.Contains("投标日期"))
                        //            BidDate = dr.Columns[2].ToPlainTextString();
                        //        else if (colName1.Contains("得票数"))
                        //            TicketNumber = dr.Columns[2].ToNodePlainString();
                        //        else if (colName1.Contains("取胜次数"))
                        //            WinNumber = dr.Columns[2].ToNodePlainString();
                        //        if (colName2.Contains("排名"))
                        //            Ranking = dr.Columns[3].ToNodePlainString();
                        //        else if (colName2.Contains("中标候选人"))
                        //            IsBid = dr.Columns[3].ToNodePlainString() == "" ? "0" : "1";

                        //        ProjectResultDtl infoDtl = ToolDb.GetProjectResultDtl(info.Id, UnitName, BidDate, IsBid, Ranking, WinNumber, TicketNumber);
                        //        ToolDb.SaveEntity(infoDtl, "SourceId,UnitName", this.ExistsUpdate);
                        //    }
                        //}
                    }
                }
            }
            return(list);
        }
Пример #29
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            //取得页码
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch (Exception ex)
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "pagination page-mar")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("/共", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&page=" + i, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "wsbs-table")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        TableRow tr = table.Rows[j];
                        headName    = tr.Columns[1].ToNodePlainString();
                        releaseTime = tr.Columns[2].ToPlainTextString().GetDateRegex();

                        infoUrl = "http://www.gzggzy.cn" + tr.Columns[1].GetATagHref();
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default);
                        }
                        catch
                        { continue; }

                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "xx-main")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            ctxHtml  = dtlNode.AsHtml();
                            infoCtx  = ctxHtml.ToCtxString();
                            msgType  = "广州公共资源交易中心";
                            infoType = "通知公告";
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "广州市区", string.Empty, infoCtx, infoType);
                            sqlCount++;
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                            {
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag aTag = aNode[k].GetATag();
                                        if (aTag.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (aTag.Link.ToLower().Contains("http"))
                                            {
                                                link = aTag.Link;
                                            }
                                            else
                                            {
                                                link = "http://www.gzggzy.cn" + aTag.Link;
                                            }
                                            BaseAttach entity = null;
                                            try
                                            {
                                                entity = ToolHtml.GetBaseAttach(link, aTag.LinkText, info.Id);
                                                if (entity != null)
                                                {
                                                    ToolDb.SaveEntity(entity, string.Empty);
                                                }
                                            }
                                            catch { }
                                        }
                                    }
                                }
                            }
                            if (crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                        }
                    }
                }
            }
            return(null);
        }
Пример #30
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            string html = string.Empty;
            string viewState = string.Empty;
            string eventValidation = string.Empty;
            string cookieStr = string.Empty;
            int    pageInt = 1, sqlCount = 0;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default).GetJsString();
            }
            catch (Exception ex)
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("width", "700")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    string temp = pageList.AsString().GetRegexBegEnd("/", "下");
                    pageInt = Convert.ToInt32(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl + "?page=" + i.ToString(), Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "98%")));
                if (nodeList != null && nodeList.Count > 1)
                {
                    TableTag table = nodeList[1] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        TableRow tr          = table.Rows[j];
                        int      attachCount = 0;
                        string   temp        = tr.Columns[1].GetAttribute("rowSpan");
                        infoType    = "办事指南";
                        releaseTime = DateTime.Now.ToString("yyyy-MM-dd");
                        headName    = tr.Columns[1].ToNodePlainString();
                        infoUrl     = "http://www.stjs.gov.cn/bsdt/" + tr.Columns[1].GetATagHref();
                        msgType     = MsgTypeCosnt.ShanTouMsgType;
                        NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "汕头市区", string.Empty, infoCtx, infoType);
                        sqlCount++;
                        if (!crawlAll && sqlCount >= this.MaxCount)
                        {
                            return(null);
                        }
                        if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                        {
                            if (infoUrl.IsAtagAttach())
                            {
                                try
                                {
                                    BaseAttach obj = ToolHtml.GetBaseAttach(infoUrl, headName, info.Id);
                                    if (obj != null)
                                    {
                                        ToolDb.SaveEntity(obj, string.Empty);
                                    }
                                }
                                catch { }
                            }
                            if (!string.IsNullOrEmpty(temp))
                            {
                                attachCount = Convert.ToInt32(temp);
                                for (int a = 0; a < attachCount; a++)
                                {
                                    TableRow dr      = table.Rows[j];
                                    ATag     fileUrl = dr.Columns[dr.ColumnCount - 1].GetATag();
                                    if (fileUrl.IsAtagAttach())
                                    {
                                        try
                                        {
                                            BaseAttach obj = ToolHtml.GetBaseAttach("http://www.stjs.gov.cn/bsdt/" + fileUrl.Link, fileUrl.LinkText, info.Id);
                                            if (obj != null)
                                            {
                                                ToolDb.SaveEntity(obj, string.Empty);
                                            }
                                        }
                                        catch { }
                                    }
                                    j++;
                                }
                                j--;
                            }
                        }
                    }
                }
            }
            return(null);
        }