Beispiel #1
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            string urlFormt        = "http://www.nxzfcg.gov.cn/ningxia/services/BulletinWebServer/getBulletinInfoList?response=application/json&pageIndex=1&pageSize={0}&siteguid={1}&categorynum=002001001&cityname=";
            IList  list            = new List <InviteInfo>();
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch { return(list); }
            string siteId = ToolHtml.GetHtmlInputValueById(html, "siteguid");
            string url    = string.Format(urlFormt, this.MaxCount, siteId);

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(url);
            }
            catch { return(null); }

            JavaScriptSerializer        serializer  = new JavaScriptSerializer();
            Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html);

            smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(smsTypeJson["return"].ToString());
            object[] listDatas = (object[])smsTypeJson["Table"];

            foreach (object data in listDatas)
            {
                string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                            prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                            specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                            remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                            CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                Dictionary <string, object> dic = data as Dictionary <string, object>;
                prjName = Convert.ToString(dic["title"]);
                string infoid = Convert.ToString(dic["infoid"]);
                beginDate = Convert.ToString(dic["infodate"]);
                string area = prjName.GetReplace("[,]", "kdxx").GetRegexBegEnd("kdxx", "kdxx");
                if (area.Contains("报名"))
                {
                    area = "";
                }
                prjName = prjName.GetReplace(string.Format("[{0}],[{1}],[{2}]", area, "正在报名", "报名结束"));
                if (prjName.Contains("[自治区]"))
                {
                    prjName = prjName.GetReplace("[自治区]");
                }
                InfoUrl = string.Format("http://www.nxggzyjy.org/ningxia/WebbuilderMIS/RedirectPage/RedirectPage.jspx?infoid={0}&categorynum=002001001&locationurl=http://www.nxggzyjy.org/ningxiaweb", infoid);
                string htmldtl = string.Empty;
                try
                {
                    htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                }
                catch { continue; }
                Parser   parser  = new Parser(new Lexer(htmldtl));
                NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "mainContent")));
                if (dtlNode != null && dtlNode.Count > 0)
                {
                    HtmlTxt   = dtlNode.AsHtml();
                    inviteCtx = HtmlTxt.ToLower().GetReplace("<br/>,<br>,</p>", "\r\n").ToCtxString();
                    buildUnit = inviteCtx.GetReplace(" ").GetBuildRegex();
                    if (buildUnit.Contains("招标代理"))
                    {
                        buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理"));
                    }
                    if (buildUnit.Contains("代理"))
                    {
                        buildUnit = buildUnit.Remove(buildUnit.IndexOf("代理"));
                    }
                    if (buildUnit.Contains("联系"))
                    {
                        buildUnit = buildUnit.Remove(buildUnit.IndexOf("联系"));
                    }
                    if (buildUnit.Contains("公司"))
                    {
                        buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                    }
                    code       = inviteCtx.GetReplace(" ").GetCodeRegex().GetCodeDel();
                    prjAddress = inviteCtx.GetReplace(" ").GetAddressRegex();
                    msgType    = "宁夏公共资源交易管理局";
                    specType   = "建设工程";
                    inviteType = prjName.GetInviteBidType();
                    InviteInfo info = ToolDb.GenInviteInfo("宁夏回族自治区", "宁夏回族自治区及地市", area, string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                    list.Add(info);
                    parser = new Parser(new Lexer(HtmlTxt));
                    NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                    if (aNode != null && aNode.Count > 0)
                    {
                        for (int k = 0; k < aNode.Count; k++)
                        {
                            ATag a = aNode[k] as ATag;
                            if (a.IsAtagAttach())
                            {
                                string link = string.Empty;
                                if (a.Link.ToLower().Contains("http"))
                                {
                                    link = a.Link;
                                }
                                else
                                {
                                    link = "http://www.nxzfcg.gov.cn/" + a.Link;
                                }
                                BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                base.AttachList.Add(attach);
                            }
                        }
                    }
                    if (!crawlAll && list.Count >= this.MaxCount)
                    {
                        return(list);
                    }
                }
            }
            return(list);
        }
Beispiel #2
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            string urlFormt        = "http://www.nxzfcg.gov.cn/ningxia/services/BulletinWebServer/getBulletinInfoList?response=application/json&pageIndex=1&pageSize={0}&siteguid={1}&categorynum=002001003&cityname=";
            IList  list            = new List <BidInfo>();
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch { return(list); }
            string siteId = ToolHtml.GetHtmlInputValueById(html, "siteguid");
            string url    = string.Format(urlFormt, this.MaxCount, siteId);

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(url);
            }
            catch { return(null); }

            JavaScriptSerializer        serializer  = new JavaScriptSerializer();
            Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html);

            smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(smsTypeJson["return"].ToString());
            object[] listDatas = (object[])smsTypeJson["Table"];

            foreach (object data in listDatas)
            {
                string prjName = string.Empty,
                            buildUnit = string.Empty, bidUnit = string.Empty,
                            bidMoney = string.Empty, code = string.Empty,
                            bidDate = string.Empty, beginDate = string.Empty,
                            endDate = string.Empty, bidType = string.Empty,
                            specType = string.Empty, InfoUrl = string.Empty,
                            msgType = string.Empty, bidCtx = string.Empty,
                            prjAddress = string.Empty, remark = string.Empty,
                            prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                Dictionary <string, object> dic = data as Dictionary <string, object>;
                prjName = Convert.ToString(dic["title"]);
                string infoid = Convert.ToString(dic["infoid"]);
                beginDate = Convert.ToString(dic["infodate"]);
                string area = prjName.GetReplace("[,]", "kdxx").GetRegexBegEnd("kdxx", "kdxx");
                prjName = prjName.GetReplace(string.Format("[{0}],[{1}],[{2}]", area, "正在报名", "报名结束"));
                if (prjName.Contains("[自治区]"))
                {
                    prjName = prjName.GetReplace("[自治区]");
                }
                InfoUrl = string.Format("http://www.nxggzyjy.org/ningxia/WebbuilderMIS/RedirectPage/RedirectPage.jspx?infoid={0}&categorynum=002001003&locationurl=http://www.nxggzyjy.org/ningxiaweb", infoid);
                string htmldtl = string.Empty;
                try
                {
                    htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                }
                catch { continue; }
                Parser   parser  = new Parser(new Lexer(htmldtl));
                NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "zbgsId")));
                //页面招标信息改为图片
                //待修改
                if (dtlNode != null && dtlNode.Count > 0)
                {
                    HtmlTxt   = dtlNode.AsHtml();
                    bidCtx    = HtmlTxt.ToLower().GetReplace("<br/>,<br>,</p>", "\r\n").ToCtxString();
                    buildUnit = bidCtx.GetReplace(" ").GetBuildRegex();
                    if (buildUnit.Contains("招标代理"))
                    {
                        buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理"));
                    }
                    if (buildUnit.Contains("代理"))
                    {
                        buildUnit = buildUnit.Remove(buildUnit.IndexOf("代理"));
                    }
                    if (buildUnit.Contains("联系"))
                    {
                        buildUnit = buildUnit.Remove(buildUnit.IndexOf("联系"));
                    }
                    if (buildUnit.Contains("改革局"))
                    {
                        buildUnit = buildUnit.Remove(buildUnit.IndexOf("改革局")) + "改革局";
                    }
                    code     = bidCtx.GetReplace(" ").GetCodeRegex().GetCodeDel();
                    bidMoney = bidCtx.GetMoneyRegex(null, false, "万元");
                    bidUnit  = bidCtx.GetBidRegex();
                    if (string.IsNullOrEmpty(bidUnit))
                    {
                        bidUnit = bidCtx.GetRegex("第一名,通过单位");
                    }
                    if (string.IsNullOrEmpty(bidUnit))
                    {
                        parser = new Parser(new Lexer(HtmlTxt));
                        NodeList tableNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("border", "1")));
                        if (tableNode != null && tableNode.Count > 1)
                        {
                            TableTag table = tableNode[0] as TableTag;
                            string   ctx   = string.Empty;
                            if (table.RowCount >= 2)
                            {
                                for (int c = 0; c < table.Rows[0].ColumnCount; c++)
                                {
                                    try
                                    {
                                        string temp      = table.Rows[0].Columns[c].ToNodePlainString();
                                        string tempValue = table.Rows[1].Columns[c].ToNodePlainString();
                                        ctx += temp + ":" + tempValue + "\r\n";
                                    }
                                    catch { }
                                }
                            }
                            bidUnit = ctx.GetBidRegex();
                            if (string.IsNullOrEmpty(bidUnit))
                            {
                                bidUnit = ctx.GetRegex("单位名称");
                            }

                            if (string.IsNullOrEmpty(bidUnit))
                            {
                                ctx = string.Empty;
                                for (int r = 0; r < table.RowCount; r++)
                                {
                                    for (int c = 0; c < table.Rows[r].ColumnCount; c++)
                                    {
                                        string temp = table.Rows[r].Columns[c].ToNodePlainString();
                                        if (c % 2 == 0)
                                        {
                                            ctx += temp + ":";
                                        }
                                        else
                                        {
                                            ctx += temp + "\r\n";
                                        }
                                    }
                                }
                                bidUnit = ctx.GetBidRegex();
                                if (string.IsNullOrEmpty(bidUnit))
                                {
                                    bidUnit = ctx.GetRegex("单位名称");
                                }
                                bidUnit = bidUnit.Replace("名称", "");
                            }
                            if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0")
                            {
                                bidMoney = ctx.GetMoneyRegex(null, false, "万元");
                            }
                            prjMgr = ctx.GetMgrRegex();
                        }
                    }
                    try
                    {
                        if (decimal.Parse(bidMoney) >= 1000000)
                        {
                            bidMoney = (decimal.Parse(bidMoney) / 10000).ToString();
                        }
                    }
                    catch { }
                    if (string.IsNullOrEmpty(bidUnit))
                    {
                        bidUnit = bidCtx.GetRegexBegEnd("1、", "得分").GetReplace(",");
                    }
                    if (bidUnit.Contains("公司"))
                    {
                        bidUnit = bidUnit.Remove(bidUnit.IndexOf("公司")) + "公司";
                    }

                    msgType  = "宁夏公共资源交易管理局";
                    specType = "建设工程";
                    bidType  = prjName.GetInviteBidType();
                    BidInfo info = ToolDb.GenBidInfo("宁夏回族自治区", "宁夏回族自治区及地市", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                    list.Add(info);
                    parser = new Parser(new Lexer(HtmlTxt));
                    NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                    if (aNode != null && aNode.Count > 0)
                    {
                        for (int k = 0; k < aNode.Count; k++)
                        {
                            ATag a = aNode[k] as ATag;
                            if (a.IsAtagAttach())
                            {
                                string link = string.Empty;
                                if (a.Link.ToLower().Contains("http"))
                                {
                                    link = a.Link;
                                }
                                else
                                {
                                    link = "http://www.nxzfcg.gov.cn/" + a.Link;
                                }
                                BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                base.AttachList.Add(attach);
                            }
                        }
                    }
                    if (!crawlAll && list.Count >= this.MaxCount)
                    {
                        return(list);
                    }
                }
            }
            return(list);
        }