Beispiel #1
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookie          = string.Empty;

            try
            {
                html = ToolHtml.GetHtmlByUrlCookie(this.SiteUrl, Encoding.Default, ref cookie);
                //html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default,ref cookie);
            }
            catch
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "ctl00_ContentPlaceHolder1_myGV_ctl23_LabelPageCount")));

            if (sNode != null && sNode.Count > 0)
            {
                try
                {
                    string temp = sNode[0].ToNodePlainString();
                    pageInt = Convert.ToInt32(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__VIEWSTATE",
                        "__VIEWSTATEENCRYPTED",
                        "__EVENTVALIDATION",
                        "ctl00$ContentPlaceHolder1$txtGcmc",
                        "ctl00$ContentPlaceHolder1$DDLGclx"
                    }, new string[] {
                        "ctl00$ContentPlaceHolder1$myGV$ctl23$LinkButtonNextPage",
                        "",
                        viewState,
                        "",
                        eventValidation,
                        "",
                        "全部类型"
                    });
                    StringBuilder post = new StringBuilder();
                    for (int n = 0; n < nvc.Count; n++)
                    {
                        if (n == 0)
                        {
                            post.Append(nvc.AllKeys[n] + "=" + nvc[n]);
                        }
                        else
                        {
                            post.Append("&" + nvc.AllKeys[n] + "=" + nvc[n]);
                        }
                    }
                    try
                    {
                        html = ToolHtml.GetHtmlGJByUrlPost(this.SiteUrl, post.ToString(), Encoding.Default, ref cookie);

                        //html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookie);
                    }
                    catch {  }
                }
                parser = new Parser(new Lexer(html));
                NodeList viewList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_ContentPlaceHolder1_myGV")));
                if (viewList != null && viewList.Count > 0)
                {
                    TableTag table = viewList[0] as TableTag;
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        TableRow tr = table.Rows[j];
                        string   prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty,
                                 bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty,
                                 InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty,
                                 prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        code      = tr.Columns[0].ToNodePlainString();
                        beginDate = tr.Columns[4].ToPlainTextString().GetDateRegex();
                        bidType   = tr.Columns[2].ToNodePlainString();
                        ATag aTag = tr.Columns[1].GetATag();
                        prjName = aTag.LinkText.ToNodeString().GetReplace(" ,[查看公告],[查看公示]");

                        InfoUrl = "http://www.hgggzy.com/ceinwz/" + aTag.Link;
                        string htlDtl = string.Empty;
                        try
                        {
                            htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default);
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htlDtl));

                        NodeList dtlNode = null;
                        NodeList aNode   = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("valign", "top")), true), new TagNameFilter("a")));
                        if (aNode != null && aNode.Count > 0)
                        {
                            ATag dtlTag = null;
                            for (int a = 0; a < aNode.Count; a++)
                            {
                                dtlTag = aNode[a].GetATag();
                                if (dtlTag.Link.Contains(".doc"))
                                {
                                    break;
                                }
                            }

                            string link = "http://www.hgggzy.com/WordHtml/BestHtml.aspx?id=" + dtlTag.Link.GetReplace("/doc/");
                            try
                            {
                                htlDtl = this.ToolWebSite.GetHtmlByUrl(link, Encoding.Default).GetJsString();
                            }
                            catch { continue; }
                            parser  = new Parser(new Lexer(htlDtl));
                            dtlNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("body"));
                        }
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.AsHtml().ToLower();
                            bidCtx  = HtmlTxt.GetReplace("</p>,</br>,<br>,</div>", "\r\n").ToCtxString();

                            buildUnit = bidCtx.GetBuildRegex();
                            if (buildUnit.Contains("招标代理"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理"));
                            }

                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                            if (tableNode != null && tableNode.Count > 0)
                            {
                                string   ctx      = string.Empty;
                                TableTag dtlTable = tableNode[0] as TableTag;
                                for (int r = 1; r < dtlTable.RowCount; r++)
                                {
                                    if (dtlTable.Rows[r].ColumnCount < 2)
                                    {
                                        break;
                                    }

                                    ctx += dtlTable.Rows[r].Columns[0].ToNodePlainString() + ":";
                                    ctx += dtlTable.Rows[r].Columns[1].ToNodePlainString() + "\r\n";
                                }
                                bidUnit = ctx.GetBidRegex();
                                if (string.IsNullOrEmpty(bidUnit))
                                {
                                    bidUnit = ctx.GetBidRegex(new string[] { "中标候选人名称" });
                                }
                                bidMoney = ctx.GetMoneyRegex();
                                prjMgr   = ctx.GetMgrRegex();
                            }
                            else
                            {
                                bidUnit  = bidCtx.GetBidRegex();
                                bidMoney = bidCtx.GetMoneyRegex();
                                prjMgr   = bidCtx.GetMgrRegex();
                            }
                            try
                            {
                                if (decimal.Parse(bidMoney) < 1)
                                {
                                    bidMoney = "0";
                                }
                            }
                            catch { }

                            msgType  = "黄冈市公共资源交易中心";
                            specType = "建设工程";
                            BidInfo info = ToolDb.GenBidInfo("湖北省", "湖北省及地市", "黄冈市", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNodes = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNodes != null && aNodes.Count > 0)
                            {
                                for (int k = 0; k < aNodes.Count; k++)
                                {
                                    ATag a = aNodes[k].GetATag();
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.hgggzy.com/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }