Пример #1
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch { return(list); }
            Parser parser = new Parser(new Lexer(html));

            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("align", "left")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("/", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "BigClass", "page"
                    }, new string[] {
                        "", i.ToString()
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("dt"), new HasAttributeFilter("class", "ny_news")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string prjName = string.Empty, buildUnit = string.Empty,
                               bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty,
                               prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty,
                               HtmlTxt = string.Empty, area = string.Empty;
                        ATag aTag = listNode[j].GetATag();
                        prjName = aTag.GetAttribute("title");
                        if (string.IsNullOrWhiteSpace(prjName))
                        {
                            prjName = aTag.LinkText;
                        }
                        beginDate = listNode[j].ToPlainTextString().GetDateRegex();
                        InfoUrl   = aTag.Link.Trim();
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default);
                        }
                        catch (Exception)
                        {
                            continue;
                        }
                        Parser   parserdetail = new Parser(new Lexer(htmldetail));
                        NodeList dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "ny_wz")));
                        if (dtnode != null && dtnode.Count > 0)
                        {
                            HtmlTxt = dtnode.AsHtml();
                            bidCtx  = HtmlTxt.GetReplace("</p>,</br>", "\r\n").GetReplace("<br />", "\r\n").ToCtxString();


                            bidUnit = bidCtx.GetBidRegex();
                            if (string.IsNullOrWhiteSpace(bidUnit))
                            {
                                bidUnit = bidCtx.GetRegex("中标人");
                            }
                            bidMoney = bidCtx.GetMoneyRegex();
                            if (string.IsNullOrWhiteSpace(bidMoney))
                            {
                                bidMoney = bidCtx.GetRegex("中标价").GetMoney("万元");
                            }

                            code = bidCtx.GetCodeRegex().GetCodeDel();
                            if (!string.IsNullOrWhiteSpace(code))
                            {
                                if (code[code.Length - 1] != '号')
                                {
                                    code = "";
                                }
                            }
                            prjMgr = bidCtx.GetMgrRegex();

                            if (bidUnit.Contains("公司"))
                            {
                                bidUnit = bidUnit.Remove(bidUnit.IndexOf("公司")) + "公司";
                            }
                            msgType  = "海南省发展和改革委员会";
                            specType = "建设工程";
                            bidType  = ToolHtml.GetInviteTypes(prjName);

                            BidInfo info = ToolDb.GenBidInfo("海南省", "海南省及地市", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);

                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://ztb.hainan.gov.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Пример #2
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            int    sqlCount        = 0;
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(htl));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "pagination")), true), new TagNameFilter("a")));

            if (nodeList != null && nodeList.Count > 0)
            {
                try
                {
                    string temp      = nodeList[nodeList.Count - 1].GetATagHref();
                    string pageCount = temp.Replace(temp.Remove(temp.IndexOf("=")), "").Replace("=", "");
                    page = int.Parse(pageCount);
                }
                catch { }
            }
            for (int i = 1; i <= page; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl + "?page=" + i.ToString(), Encoding.UTF8, ref cookiestr);
                    }
                    catch (Exception ex) { continue; }
                }
                parser = new Parser(new Lexer(htl));
                NodeList liNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "list")), true), new TagNameFilter("li")));
                if (liNode != null && liNode.Count > 0)
                {
                    for (int j = 0; j < liNode.Count; j++)
                    {
                        string prjName = string.Empty,
                               buildUnit = string.Empty, bidUnit = string.Empty,
                               bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty,
                               beginDate = string.Empty,
                               endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty,
                               msgType = string.Empty, bidCtx = string.Empty,
                               prjAddress = string.Empty, remark = string.Empty,
                               prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        beginDate = liNode[j].ToPlainTextString().GetDateRegex("yyyy/MM/dd");
                        prjName   = liNode[j].ToPlainTextString().Replace(beginDate, "").ToNodeString().Replace(" ", "").Replace("·", "");
                        ATag aTag = liNode[j].GetATag();
                        InfoUrl = "http://www.yjggzy.cn" + aTag.Link;
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace("&nbsp;", "");
                        }
                        catch
                        {
                            continue;
                        }
                        Parser   parserdetail = new Parser(new Lexer(htmldetail));
                        NodeList dtlNode      = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("dl"), new HasAttributeFilter("class", "acticlecontent")));
                        if (dtlNode == null || dtlNode.Count < 1)
                        {
                            parserdetail.Reset();
                            dtlNode = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "nr")));
                        }
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.ToHtml();

                            bidCtx = HtmlTxt.ToCtxString().Replace("\r\n\t\r\n\t", "\r\n\t");
                            bidCtx = bidCtx.Replace("\r\n\t\r\n\t", "\r\n\t");
                            bidCtx = bidCtx.Replace("\r\n\t\r\n\t", "\r\n\t");
                            bidCtx = bidCtx.Replace("\r\n\t\r\n\t", "\r\n\t");
                            bidCtx = bidCtx.Replace("\r\n\t\r\n\t", "\r\n\t");
                            bidCtx = bidCtx.Replace("\r\n\r\n\t", "\r\n\t");
                            bidCtx = bidCtx.Replace("\r\n\r\n\t", "\r\n\t");
                            bidCtx = bidCtx.Replace("\r\n\r\n\t", "\r\n\t");
                            bidCtx = bidCtx.Replace("\r\n\r\n\t", "\r\n\t");
                            bidCtx = bidCtx.Replace("\r\n\r\n", "\r\n");
                            bidCtx = bidCtx.Replace("\r\n\r\n", "\r\n");
                            bidCtx = bidCtx.Replace("\r\n\r\n", "\r\n");
                            bidCtx = bidCtx.Replace("\r\n\r\n", "\r\n");
                            bidCtx = bidCtx.Replace("\r\n\r\n", "\r\n");
                            bidCtx = bidCtx.Replace("\r\n\t\r\n\t", "\r\n\t");
                            bidCtx = bidCtx.Replace("\r\n\r\n", "\r\n");
                            bidCtx = bidCtx.Replace("\r\n\t\r\n\t", "\r\n\t");

                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList dtlNodeList = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                            if (dtlNodeList != null && dtlNodeList.Count > 0)
                            {
                                string   ctx      = string.Empty;
                                TableTag tableTag = dtlNodeList[0] as TableTag;
                                foreach (TableRow row in tableTag.Rows)
                                {
                                    int colIndex = 0;
                                    foreach (TableColumn col in row.Columns)
                                    {
                                        if (row.Columns.Length == 3)
                                        {
                                            if (colIndex == 0 && col.GetAttribute("colspan") != "2")
                                            {
                                                colIndex++;
                                                continue;
                                            }
                                            else if (col.GetAttribute("colspan") == "2" && colIndex == 1)
                                            {
                                                ctx += col.ToNodePlainString() + ":";
                                            }
                                            else if (!string.IsNullOrEmpty(col.GetAttribute("colspan")) && colIndex == 2)
                                            {
                                                ctx += col.ToNodePlainString() + "\r\n";
                                            }
                                            else if (string.IsNullOrEmpty(col.GetAttribute("colspan")) && colIndex == 1)
                                            {
                                                ctx += col.ToNodePlainString() + ":";
                                            }
                                            else if (string.IsNullOrEmpty(col.GetAttribute("colspan")) && colIndex == 2)
                                            {
                                                ctx += col.ToNodePlainString() + "\r\n";
                                            }
                                            colIndex++;
                                            continue;
                                        }
                                        if (row.Columns.Length == 2)
                                        {
                                            if (colIndex == 0)
                                            {
                                                ctx += col.ToNodePlainString() + ":";
                                            }
                                            else if (colIndex == 1)
                                            {
                                                ctx += col.ToNodePlainString() + "\r\n";
                                            }
                                            colIndex++;
                                            continue;
                                        }
                                        if (colIndex == 0 && col.GetAttribute("colspan") != "2")
                                        {
                                            colIndex++;
                                            continue;
                                        }
                                        else if (colIndex == 1 && col.GetAttribute("colspan") != "2")
                                        {
                                            ctx += col.ToNodePlainString() + ":";
                                        }
                                        else if (colIndex == 2 && col.GetAttribute("colspan") != "2")
                                        {
                                            ctx += col.ToNodePlainString() + "\r\n";
                                        }
                                        else if (col.GetAttribute("colspan") == "2" && colIndex == 0)
                                        {
                                            ctx += col.ToNodePlainString() + ":";
                                        }

                                        else if (!string.IsNullOrEmpty(col.GetAttribute("colspan")) && colIndex == 1)
                                        {
                                            ctx += col.ToNodePlainString() + "\r\n";
                                        }

                                        colIndex++;
                                    }
                                }

                                buildUnit  = ctx.GetBuildRegex();
                                bidUnit    = ctx.GetBidRegex();
                                code       = ctx.GetCodeRegex();
                                prjAddress = ctx.GetAddressRegex();
                                prjMgr     = ctx.GetMgrRegex();
                                if (string.IsNullOrEmpty(prjMgr))
                                {
                                    prjMgr = ctx.GetRegex("项目负责人姓名", true, 50);
                                }
                                bidMoney = ctx.GetMoneyRegex();
                            }
                            else
                            {
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList imgNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                                if (imgNode != null && imgNode.Count > 0)
                                {
                                    ImageTag img  = imgNode[0] as ImageTag;
                                    string   link = "http://www.yjggzy.cn" + img.GetAttribute("src");
                                    HtmlTxt = HtmlTxt.GetReplace(img.GetAttribute("src"), link);
                                }
                            }
                            msgType  = "阳江市建设工程交易中心";
                            specType = "建设工程";
                            bidType  = ToolHtml.GetInviteTypes(prjName);
                            BidInfo info = ToolDb.GenBidInfo("广东省", "阳江市区", "", string.Empty, code, prjName, buildUnit, beginDate,
                                                             bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType,
                                                             bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            sqlCount++;
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx))
                            {
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int a = 0; a < aNode.Count; a++)
                                    {
                                        ImageTag img = aNode[a] as ImageTag;
                                        try
                                        {
                                            BaseAttach attach = ToolHtml.GetBaseAttach(img.GetAttribute("src"), prjName, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                            if (attach != null)
                                            {
                                                ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                                            }
                                        }
                                        catch { }
                                    }
                                }
                            }
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                        }
                    }
                }
            }
            return(null);
        }
Пример #3
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            //取得页码
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch (Exception ex)
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "dataPager")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    string temp = pageList.AsString().GetRegexBegEnd("共有:", "页");
                    pageInt = Convert.ToInt32(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        viewState = this.ToolWebSite.GetAspNetViewState(html);
                        NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                            "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "searcher:txtKeyWord", "searcher:tcInputDateTime:txtDateTime1",
                            "searcher:tcInputDateTime:txtDateTime2", "searcher:ddlProvince", "searcher:ddlCity1", "searcher:ddlCity2"
                        }, new string[] {
                            "dataPager", i.ToString(), viewState, "", "", "", "-1", "-1", "-1"
                        });
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "list")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    for (int j = 0; j < nodeList.Count; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        infoType    = "办事指南";
                        headName    = nodeList[j].GetATagValue("Txt");
                        releaseTime = nodeList[j].ToPlainTextString().GetDateRegex();
                        infoUrl     = nodeList[j].GetATagHref();
                        // infoUrl = "http://www.sgjsj.gov.cn/sgwebims/" + tr.Columns[0].GetATagValue("onclick").Replace("(", "kdxx").Replace(")", "xxdk").GetRegexBegEnd("kdxx", "xxdk").Replace("\"", "");
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "crt fr")));
                        if (dtlList != null && dtlList.Count > 0)
                        {
                            ctxHtml  = dtlList.AsHtml();
                            infoCtx  = ctxHtml.ToCtxString();
                            msgType  = MsgTypeCosnt.ShaoGuanMsgType;
                            headName = infoCtx.GetRegexBegEnd("列表\r\n", "\r\n");
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "韶关市区", string.Empty, infoCtx, infoType);
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            else
                            {
                                sqlCount++;
                                if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                                {
                                    parser = new Parser(new Lexer(htldtl));
                                    NodeList tabNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "Table1")));
                                    NodeList aNode   = null;
                                    if (tabNode != null && tabNode.Count > 1)
                                    {
                                        parser = new Parser(new Lexer(tabNode[1].ToHtml()));
                                        aNode  = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                    }
                                    else if (tabNode != null && tabNode.Count > 0)
                                    {
                                        parser = new Parser(new Lexer(tabNode.AsHtml()));
                                        aNode  = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                    }
                                    if (aNode != null && aNode.Count > 0)
                                    {
                                        for (int a = 0; a < aNode.Count; a++)
                                        {
                                            ATag aTag = aNode[a] as ATag;
                                            if (aTag.IsAtagAttach())
                                            {
                                                try
                                                {
                                                    BaseAttach obj = ToolHtml.GetBaseAttach("http://www.sgjsj.gov.cn/sgwebims/" + aTag.Link.Replace("../", "").Replace("./", ""), aTag.LinkText, info.Id);
                                                    if (obj != null)
                                                    {
                                                        ToolDb.SaveEntity(obj, string.Empty);
                                                    }
                                                }
                                                catch { }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
Пример #4
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            //取得页码
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("tr"), new HasAttributeFilter("valign", "top")), true), new TagNameFilter("table")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    TableTag tab = pageList[0] as TableTag;
                    pageInt = tab.Rows[0].ColumnCount;
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        viewState       = this.ToolWebSite.GetAspNetViewState(html);
                        eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                        NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                            "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "__EVENTVALIDATION", "sel", "beginDate", "endDate", "infotitle"
                        }, new string[] {
                            "GridView1", "Page$" + i.ToString(), viewState, eventValidation, "1", "", "", ""
                        });
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "GridView1")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        TableRow tr = table.Rows[j];
                        infoType    = "办事指南";
                        headName    = tr.Columns[1].ToNodePlainString();
                        infoScorce  = tr.Columns[2].ToNodePlainString();
                        releaseTime = tr.Columns[3].ToPlainTextString().GetDateRegex();

                        infoUrl = "http://www.szjsjy.com.cn/" + tr.Columns[1].GetATagHref().Replace("../", "");
                        ctxHtml = "<p>信息标题:" + headName + "<br/>信息来源:" + infoScorce + "<br/>发布时间:" + releaseTime + "</p>";
                        infoCtx = "信息标题:" + headName + "\r\n信息来源:" + infoScorce + "\r\n发布时间:" + releaseTime + "\r\n";
                        msgType = MsgTypeCosnt.ShenZhenMsgType;
                        NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "深圳市工程", string.Empty, infoCtx, infoType);
                        if (!crawlAll && sqlCount >= this.MaxCount)
                        {
                            return(null);
                        }
                        else
                        {
                            sqlCount++;
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                            {
                                try
                                {
                                    BaseAttach obj = ToolHtml.GetBaseAttach(infoUrl, headName, info.Id);
                                    if (obj != null)
                                    {
                                        ToolDb.SaveEntity(obj, string.Empty);
                                    }
                                }
                                catch { }
                            }
                        }
                    }
                }
            }
            return(null);
        }
Пример #5
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "cNavBar_cTotalPages")));

            if (sNode != null && sNode.Count > 0)
            {
                try
                {
                    string temp = sNode[0].ToNodePlainString();
                    pageInt = int.Parse(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__VIEWSTATE",
                        "__VIEWSTATEGENERATOR",
                        "__EVENTVALIDATION",
                        "cSortField",
                        "cSortDirection",
                        "cID",
                        "cParentID",
                        "cLeft:cParentID",
                        "cLeft:cID",
                        "cNavBar:cPageIndex"
                    }, new string[] {
                        viewState,
                        "8A9C3F4D",
                        eventValidation,
                        "",
                        "",
                        "1080200",
                        "1080000",
                        "1080000",
                        "1080200",
                        i.ToString()
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList viewList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "list")), true), new TagNameFilter("li")));

                if (viewList != null && viewList.Count > 0)
                {
                    for (int j = 0; j < viewList.Count; j++)
                    {
                        string buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, beginDate = string.Empty, prjName = string.Empty, InfoUrl = string.Empty;


                        ATag aTag = viewList[j].GetATag();
                        beginDate = viewList[j].ToPlainTextString().GetDateRegex();
                        prjName   = aTag.GetAttribute("title");
                        InfoUrl   = "http://xzedu.zhuhai.gov.cn/" + aTag.Link.GetReplace("./");
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtl = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "news_view_main")), true), new TagNameFilter("li")));
                        if (dtl != null && dtl.Count > 1)
                        {
                            HtmlTxt = dtl[1].ToHtml().ToLower();
                            bidCtx  = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString();
                            string src = string.Empty;
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                            if (tableNode != null && tableNode.Count > 0)
                            {
                                TableTag tag = tableNode[0] as TableTag;
                                if (tag.RowCount > 1)
                                {
                                    string ctx = string.Empty;
                                    try
                                    {
                                        for (int r = 0; r < tag.Rows[0].ColumnCount; r++)
                                        {
                                            ctx += tag.Rows[0].Columns[r].ToNodePlainString() + ":";
                                            ctx += tag.Rows[1].Columns[r].ToNodePlainString() + "\r\n";
                                        }
                                    }
                                    catch { }
                                    bidUnit = ctx.GetBidRegex().GetReplace("中标(成交)");
                                    if (string.IsNullOrEmpty(bidUnit))
                                    {
                                        bidUnit = ctx.GetRegex("投标单位");
                                    }
                                    bidMoney = ctx.GetMoneyRegex();
                                    prjMgr   = ctx.GetMgrRegex();
                                }
                            }
                            else
                            {
                                Parser   imgParser = new Parser(new Lexer(HtmlTxt.ToLower()));
                                NodeList imgNode   = imgParser.ExtractAllNodesThatMatch(new TagNameFilter("img"));

                                if (imgNode != null && imgNode.Count > 0)
                                {
                                    string imgUrl = (imgNode[0] as ImageTag).GetAttribute("src");
                                    src     = "http://xzedu.zhuhai.gov.cn/" + imgUrl;
                                    HtmlTxt = HtmlTxt.ToLower().GetReplace(imgUrl, src);
                                }
                                bidUnit = bidCtx.GetBidRegex().GetReplace("中标(成交)");
                                if (string.IsNullOrEmpty(bidUnit))
                                {
                                    bidUnit = bidCtx.GetRegex("中标(成交)供应商名称");
                                }
                                bidMoney = bidCtx.GetMoneyRegex(new string[] { "中标(成交)候选人投标报价" });
                                if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0")
                                {
                                    bidMoney = bidCtx.GetMoneyRegex();
                                }
                                prjMgr = bidCtx.GetMgrRegex();
                            }
                            buildUnit = bidCtx.GetBuildRegex();
                            code      = bidCtx.GetCodeRegex().GetCodeDel();
                            if (buildUnit.Contains("地址"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址"));
                            }
                            if (buildUnit.Contains("招标代理"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理"));
                            }

                            code = bidCtx.GetCodeRegex().GetCodeDel();

                            try
                            {
                                if (decimal.Parse(bidMoney) < 1)
                                {
                                    bidMoney = "0";
                                }
                                if (decimal.Parse(bidMoney) > 100000)
                                {
                                    bidMoney = (decimal.Parse(bidMoney) / 10000).ToString();
                                }
                            }
                            catch { }

                            if (prjMgr.Contains("资格"))
                            {
                                prjMgr = prjMgr.Remove(prjMgr.IndexOf("资格"));
                            }

                            specType = "政府采购";
                            bidType  = prjName.GetInviteBidType();
                            msgType  = "珠海市香洲区教育局";
                            BidInfo info = ToolDb.GenBidInfo("广东省", "珠海市区", "香洲区", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            if (!string.IsNullOrEmpty(src))
                            {
                                string sql = string.Format("select Id from BidInfo where InfoUrl='{0}'", info.InfoUrl);
                                object obj = ToolDb.ExecuteScalar(sql);
                                if (obj == null || obj.ToString() == "")
                                {
                                    try
                                    {
                                        BaseAttach attach = ToolHtml.GetBaseAttach(src, prjName, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                        if (attach != null)
                                        {
                                            ToolDb.SaveEntity(attach, "");
                                        }
                                    }
                                    catch { }
                                }
                            }
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k].GetATag();
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://xzedu.zhuhai.gov.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Пример #6
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            //取得页码
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "ctl00_cph_context_GridViewPaingTwo1_lblGridViewPagingDesc")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    string temp = pageList.AsString().GetRegexBegEnd("共", "页");
                    pageInt = Convert.ToInt32(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_cph_context_GridView1")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        TableRow tr = table.Rows[j];
                        infoType    = "办事指南";
                        headName    = tr.Columns[1].ToNodePlainString();
                        releaseTime = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        infoUrl     = "http://www.dgzb.com.cn/DGJYWEB/SiteManage/" + tr.Columns[1].GetATagHref();
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = ToolHtml.GetHtmlByUrlEncode(infoUrl, Encoding.UTF8);
                        }
                        catch {  }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "line")));
                        if (dtlList != null && dtlList.Count > 0)
                        {
                            ctxHtml = dtlList.AsHtml();
                            infoCtx = dtlList.AsString();
                            msgType = MsgTypeCosnt.DongGuanMsgType;
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "东莞市区", string.Empty, infoCtx, infoType);
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            else
                            {
                                sqlCount++;
                                if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                                {
                                    parser = new Parser(new Lexer(htldtl));
                                    NodeList aNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_cph_context_DownLoadFiles1_GridView1")));
                                    if (aNode != null && aNode.Count > 0)
                                    {
                                        TableTag tab = aNode[0] as TableTag;
                                        for (int a = 1; a < tab.RowCount; a++)
                                        {
                                            TableRow dr   = tab.Rows[a];
                                            ATag     aTag = dr.Columns[1].GetATag();
                                            if (aTag.IsAtagAttach())
                                            {
                                                try
                                                {
                                                    BaseAttach obj = ToolHtml.GetBaseAttach("http://www.dgzb.com.cn/DGJYWEB/SiteManage/" + aTag.Link, aTag.LinkText, info.Id);
                                                    if (obj != null)
                                                    {
                                                        ToolDb.SaveEntity(obj, string.Empty);
                                                    }
                                                }
                                                catch {  }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
Пример #7
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "pagination")));

            if (sNode != null && sNode.Count > 0)
            {
                try
                {
                    string temp = sNode.AsString().Replace(" ", "");
                    Regex  reg  = new Regex(@"/[^页]+页");
                    pageInt = Convert.ToInt32(reg.Match(temp).Value.Replace("/", "").Replace("页", ""));
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://zyjy.huizhou.gov.cn/pages/cms/hzggzyjyzx/html/artList.html?cataId=54f6d9f3580843d59b9dd64918e7ae4f&pageNo=" + i.ToString(), Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList viewList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "div_list"))), new TagNameFilter("ul")));
                if (viewList != null && viewList.Count > 0)
                {
                    for (int j = 0; j < viewList.Count; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        Regex regDate = new Regex(@"\d{4}-\d{1,2}-\d{1,2}");
                        beginDate = regDate.Match(viewList[j].ToPlainTextString()).Value;
                        prjName   = viewList[j].ToPlainTextString().Replace("\r", "").Replace("\n", "").Replace(beginDate, "");
                        ATag aTag = viewList.SearchFor(typeof(ATag), true)[j] as ATag;
                        InfoUrl = "http://zyjy.huizhou.gov.cn" + aTag.Link;
                        string htmDtl = string.Empty;
                        try
                        {
                            System.Data.DataTable dt = new System.Data.DataTable();
                            htmDtl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(InfoUrl), Encoding.UTF8);
                            Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>|<style[^<]*</style>|<xml[^<]*</xml>");
                            htmDtl = regexHtml.Replace(htmDtl, "");
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmDtl));
                        NodeList dtl = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "divZoom")));
                        if (dtl != null && dtl.Count > 0)
                        {
                            HtmlTxt   = System.Text.RegularExpressions.Regex.Replace(dtl.ToHtml(), "(<script)[\\s\\S]*?(</script>)", "");
                            inviteCtx = System.Text.RegularExpressions.Regex.Replace(HtmlTxt, "(<script)[\\s\\S]*?(</script>)", "");
                            inviteCtx = System.Text.RegularExpressions.Regex.Replace(inviteCtx, "<[^>]*>", "").Replace("&nbsp;", "").Replace(" ", "").Replace("\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\r\n", "\r\n").Replace("\r\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\t", "").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n");

                            Regex regPrjAddr = new Regex(@"(工程位置|工程地点|工程地址|详细地址|地点|地址)(:|:)[^\r\n]+\r\n");
                            prjAddress = regPrjAddr.Match(inviteCtx).Value.Replace("工程位置", "").Replace("工程地点", "").Replace("工程地址", "").Replace("详细地址", "").Replace("地点", "").Replace("地址", "").Replace(":", "").Replace(":", "").Trim();
                            Regex regBuildUnit = new Regex(@"(招标代理机构|招标单位|招标人|招标单位(盖章))(:|:)[^\r\n]+\r\n");
                            buildUnit = regBuildUnit.Match(inviteCtx).Value.Replace("招标代理机构", "").Replace("招标单位", "").Replace("招标人", "").Replace("(盖章)", "").Replace(":", "").Replace(":", "").Trim();
                            if (buildUnit.Contains("资质"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("资质"));
                            }
                            prjAddress = ToolHtml.GetSubString(prjAddress, 150);
                            buildUnit  = ToolHtml.GetSubString(buildUnit, 150);
                            Regex regPrjCode = new Regex(@"(工程编号|项目编号|编号)(:|:)[^\r\n]+\r\n");
                            code       = regPrjCode.Match(inviteCtx).Value.Replace("工程编号", "").Replace("项目编号", "").Replace("编号", "").Replace(":", "").Replace(":", "").Trim();
                            msgType    = "惠州市公共资源交易中心";
                            specType   = "建设工程";
                            inviteType = ToolHtml.GetInviteTypes(prjName);
                            if (string.IsNullOrEmpty(prjAddress) || Encoding.Default.GetByteCount(prjAddress) > 150)
                            {
                                prjAddress = "见招标信息";
                            }
                            if (Encoding.Default.GetByteCount(code) > 50)
                            {
                                code = "";
                            }
                            inviteType = ToolHtml.GetInviteType(inviteType);
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "惠州市区", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }

            return(list);
        }
Пример #8
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default, ref cookiestr);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(htl));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("id", "dnn_ctr513_ArticleList_cboPages")));

            if (nodeList != null && nodeList.Count > 0)
            {
                string oo = nodeList.AsString().Trim();
                page = Convert.ToInt32(oo.Substring(oo.LastIndexOf("第")).ToString().Replace("第", "").Replace("页", "").Trim());
            }
            for (int i = 1; i <= page; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(htl);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(htl);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__LASTFOCUS",
                        "__VIEWSTATE",
                        "dnn$ctr513$ArticleList$cboPages",
                        "ScrollTop",
                        "__dnnVariable"
                    }, new string[] {
                        "dnn$ctr513$ArticleList$cboPages",
                        string.Empty,
                        string.Empty,
                        viewState,
                        (i - 1).ToString(),
                        "716",
                        eventValidation
                    });
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.Default, ref cookiestr);
                    }
                    catch (Exception ex) { continue; }
                }
                parser = new Parser(new Lexer(htl));
                NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "dnn_ctr513_ArticleList_PanelA")));
                if (tableNodeList != null && tableNodeList.Count > 0)
                {
                    TableTag table = tableNodeList.SearchFor(typeof(TableTag), true)[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        TableRow tr = table.Rows[j];
                        if (tr.ColumnCount < 2)
                        {
                            continue;
                        }
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        prjName   = tr.Columns[0].ToPlainTextString().Trim();
                        beginDate = tr.Columns[1].ToPlainTextString().Trim();
                        ATag aTag = tr.Columns[0].SearchFor(typeof(ATag), true)[0] as ATag;

                        InfoUrl = "http://zb.zjcic.net" + aTag.Link.Replace("amp;", "").Trim();
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(InfoUrl), Encoding.Default).Replace("&nbsp;", "");
                        }
                        catch (Exception)
                        {
                            Logger.Error("InviteZhanJiangJSTwo");
                            continue;
                        }
                        Parser   parserdetail = new Parser(new Lexer(htmldetail));
                        NodeList dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "dnn_ctr377_ArticleShow_lblContent")));
                        if (dtnode.Count > 0)
                        {
                            HtmlTxt   = dtnode.AsHtml();
                            inviteCtx = dtnode.AsString().Trim().Replace("&#160;", "").Trim();
                            Regex regBuidUnit = new Regex(@"(招标单位|招标人|招  标 单 位):[^\r\n]+\r\n");
                            buildUnit = regBuidUnit.Match(inviteCtx).Value.Replace("招标单位:", "").Replace("招  标 单 位:", "").Replace(":", "").Replace("&#160;", "").Trim();
                            if (buildUnit == "")
                            {
                                Regex regBuidUnitT = new Regex(@"招 标 单 位: [^\r\n]+\r\n");
                                buildUnit = regBuidUnitT.Match(inviteCtx).Value.Replace("招 标 单 位: ", "").Replace("&#160;", "").Trim();
                            }
                            if (buildUnit == "")
                            {
                                buildUnit = "";
                            }
                            Regex regPrjAddr = new Regex(@"(工程地点|工程地址|地 址|工  程 地 点)(:|:)[^\r\n]+\r\n");
                            prjAddress = regPrjAddr.Match(inviteCtx).Value.Replace("工程地点:", "").Replace("工程地址", "").Replace("地 址", "").Replace("工  程 地 点:", "").Replace(":", "").Trim();
                            if (prjAddress == "")
                            {
                                Regex regPrjAddrT = new Regex(@"工 程 地 点: [^\r\n]+\r\n");
                                prjAddress = regPrjAddrT.Match(inviteCtx).Value.Replace("工 程 地 点: ", "").Trim();
                            }
                            msgType  = "湛江市建设工程交易中心";
                            specType = "建设工程";
                            if (prjAddress == "")
                            {
                                prjAddress = "见招标信息";
                            }
                            prjName    = prjName.Replace("·", "");
                            inviteType = ToolHtml.GetInviteTypes(prjName);
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "湛江市区", "",
                                                                   string.Empty, code, prjName, prjAddress, buildUnit,
                                                                   beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Пример #9
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default, ref cookiestr);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser    = new Parser(new Lexer(htl));
            NodeList nodeList  = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "scott")), true), new TagNameFilter("a")));
            Regex    regexPage = new Regex(@"共\d+页");

            try
            {
                Regex numpage = new Regex(@"[0-9]+[.]{0,1}[0-9]+");
                ATag  link    = (ATag)nodeList[nodeList.Count - 1];
                page = Convert.ToInt32(numpage.Match(link.Link).Value.Trim());
            }
            catch (Exception)
            { }
            for (int i = 1; i <= page; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(htl);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(htl);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "newtitle",
                        "totalRows",
                        "pageNO"
                    }, new string[] {
                        string.Empty,
                        "0",
                        i.ToString()
                    });
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.Default, ref cookiestr).Replace("<th", "<td").Replace("</th>", "</td>").Replace("&nbsp;", "");
                    }
                    catch (Exception ex) { continue; }
                }
                parser = new Parser(new Lexer(htl));
                NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "cnewslist")));
                if (tableNodeList != null && tableNodeList.Count > 0)
                {
                    TableTag table = (TableTag)tableNodeList[0];
                    for (int j = 1; j < table.RowCount - 2; j++)
                    {
                        string prjName = string.Empty,
                               buildUnit = string.Empty, bidUnit = string.Empty,
                               bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty,
                               beginDate = string.Empty,
                               endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty,
                               msgType = string.Empty, bidCtx = string.Empty,
                               prjAddress = string.Empty, remark = string.Empty,
                               prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr = table.Rows[j];
                        prjName   = tr.Columns[0].ToPlainTextString().Trim();
                        beginDate = tr.Columns[1].ToPlainTextString().Trim();
                        ATag aTag = tr.Columns[0].SearchFor(typeof(ATag), true)[0] as ATag;
                        InfoUrl = aTag.Link;
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(InfoUrl), Encoding.Default).Replace("<th", "<td").Replace("</th>", "</td>").Replace("</TH>", "</td>").Replace("<TH", "<td").Replace("&nbsp;", "");
                        }
                        catch (Exception)
                        {
                            Logger.Error("BidZhuHaiJS");
                            continue;
                        }
                        bool     htmlBool     = true;
                        Parser   parserdetail = new Parser(new Lexer(htmldetail));
                        NodeList dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "maintable")));
                        if (dtnode.Count <= 0)
                        {
                            parserdetail = new Parser(new Lexer(htmldetail));
                            dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "printTb")), true), new TagNameFilter("table")));
                        }
                        if (dtnode.Count <= 0)
                        {
                            parserdetail = new Parser(new Lexer(htmldetail));
                            dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "printTb")), true), new TagNameFilter("p")));
                            htmlBool     = false;
                        }
                        if (dtnode.Count <= 0)
                        {
                            parserdetail = new Parser(new Lexer(htmldetail));
                            dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("class", "fwinProjectForHand"), new TagNameFilter("div")));
                        }
                        if (dtnode.Count > 0)
                        {
                            HtmlTxt = dtnode.AsHtml();
                            if (htmlBool)
                            {
                                TableTag tabletwo = (TableTag)dtnode[0];
                                for (int row = 0; row < tabletwo.RowCount; row++)
                                {
                                    TableRow r = tabletwo.Rows[row];
                                    for (int k = 0; k < r.ColumnCount; k++)
                                    {
                                        string st  = string.Empty;
                                        string st1 = string.Empty;
                                        st = r.Columns[k].ToPlainTextString().Trim();
                                        if (k + 1 < r.ColumnCount)
                                        {
                                            st1 = r.Columns[k + 1].ToPlainTextString().Trim();
                                        }
                                        bidCtx += st + ":" + st1 + "\r\n";
                                        if (k + 1 <= r.ColumnCount)
                                        {
                                            k++;
                                        }
                                    }
                                }
                            }
                            else
                            {
                                for (int k = 0; k < dtnode.Count; k++)
                                {
                                    bidCtx += dtnode[k].ToPlainTextString() + "\r\n";
                                }
                            }
                            bidCtx = bidCtx.Replace("(单价)", "").Trim();
                            Regex regendDate = new Regex(@"(公告发布时间|公示日期):[^\r\n]+[\r\n]{1}");
                            endDate = regendDate.Match(bidCtx).Value.Replace("公告发布时间:", "").Replace("公示日期:", "").Trim();
                            string date    = endDate.Replace(" ", "").Trim();
                            Regex  regDate = new Regex(@"至\d{4}-\d{1,2}-\d{1,2}");
                            endDate = regDate.Match(date).Value.Replace("至", "").Trim();
                            if (endDate == "")
                            {
                                Regex regDateT = new Regex(@"--\d{4}-\d{1,2}-\d{1,2}");
                                endDate = regDateT.Match(date).Value.Replace("--", "").Trim();
                            }
                            if (endDate == "")
                            {
                                Regex regDateT = new Regex(@"至\d{4}年\d{1,2}月\d{1,2}日");
                                endDate = regDateT.Match(date).Value.Replace("--", "").Trim();
                            }
                            if (endDate == "")
                            {
                                Regex regDateT = new Regex(@"--\d{4}年\d{1,2}月\d{1,2}日");
                                endDate = regDateT.Match(date).Value.Replace("--", "").Trim();
                            }
                            if (endDate == "")
                            {
                                Regex regDateT = new Regex(@"-\d{4}年\d{1,2}月\d{1,2}日");
                                endDate = regDateT.Match(date).Value.Replace("-", "").Trim();
                            }
                            if (endDate == "")
                            {
                                endDate = string.Empty;
                            }
                            Regex regBidUnit = new Regex(@"(第一中标候选人|中标人|中标单位)(:|:)[^\r\n]+\r\n");
                            bidUnit = regBidUnit.Match(bidCtx).Value.Replace("第一中标候选人", "").Replace("中标人:", "").Replace("中标单位:", "").Replace(":", "").Replace(":", "").Trim();
                            if (string.IsNullOrEmpty(bidUnit))
                            {
                                Regex regBidUnit1 = new Regex(@"(第一中标候选人|中标人|中标单位)[^\r\n]+\r\n");
                                bidUnit = regBidUnit1.Match(bidCtx).Value.Replace("第一中标候选人", "").Replace("中标人", "").Replace("中标单位", "").Trim();
                            }
                            Regex regbidMoney = new Regex(@"中标价(:|:)[^\r\n]+\r\n");
                            bidMoney = regbidMoney.Match(bidCtx).Value.Trim();
                            if (string.IsNullOrEmpty(bidMoney))
                            {
                                Regex regbidMoney1 = new Regex(@"中标价[^\r\n]+\r\n");
                                bidMoney = regbidMoney1.Match(bidCtx).Value.Trim();
                            }
                            Regex regBidMoney = new Regex(@"[0-9]+[.]{0,1}[0-9]+");
                            if (bidMoney.Contains(","))
                            {
                                bidMoney = bidMoney.Replace(",", "").Trim();
                            }
                            if (bidMoney.Contains("万"))
                            {
                                bidMoney = bidMoney.Remove(bidMoney.IndexOf("万")).Trim();
                                bidMoney = regBidMoney.Match(bidMoney).Value;
                            }
                            else
                            {
                                try
                                {
                                    bidMoney = (decimal.Parse(regBidMoney.Match(bidMoney).Value) / 10000).ToString();
                                    if (decimal.Parse(bidMoney) < decimal.Parse("0.1"))
                                    {
                                        bidMoney = "0";
                                    }
                                }
                                catch (Exception)
                                {
                                    bidMoney = "0";
                                }
                            }
                            Regex regprjMgr = new Regex(@"(项目负责人|项目经理|项目总监)(:|:)[^\r\n]+\r\n");
                            prjMgr = regprjMgr.Match(bidCtx).Value.Replace("项目负责人:", "").Replace("项目经理:", "").Replace("项目总监:", "").Trim();
                            Regex regcode = new Regex(@"项目编号(:|:)[^\r\n]+\r\n");
                            code     = regcode.Match(bidCtx).Value.Replace("项目编号:", "").Replace(":", "").Trim();
                            msgType  = "珠海市建设工程交易中心";
                            specType = "建设工程";
                            bidCtx   = bidCtx.Replace("<?", "").Replace("xml:namespace prefix = st1 ns = ", "").Replace("urn:schemas-microsoft-com:office:smarttags", "").Replace("/>", "").Trim();
                            bidCtx   = bidCtx.Replace("<?", "").Replace("xml:namespace prefix = o ns = ", "").Replace("urn:schemas-microsoft-com:office:office", "").Replace("/>", "").Trim();
                            Regex regInvType = new Regex(@"[^\r\n]+[\r\n]{1}");
                            buildUnit = "";
                            if (bidUnit == "")
                            {
                                bidUnit = "";
                            }
                            prjName = ToolDb.GetPrjName(prjName);
                            bidType = ToolHtml.GetInviteTypes(prjName);
                            BidInfo info = ToolDb.GenBidInfo("广东省", "珠海市区", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType,
                                                             bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(null);
        }
Пример #10
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default, ref cookiestr);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(htl));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("id", "dnn_ctr476_ArticleList_cboPages")));

            if (nodeList != null && nodeList.Count > 0)
            {
                string oo = nodeList.AsString().Trim();
                page = Convert.ToInt32(oo.Substring(oo.LastIndexOf("第")).ToString().Replace("第", "").Replace("页", "").Trim());
            }
            for (int i = 1; i <= page; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(htl);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(htl);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__LASTFOCUS",
                        "__VIEWSTATE",
                        "dnn$ctr476$ArticleList$cboPages",
                        "ScrollTop",
                        "__dnnVariable"
                    }, new string[] {
                        "dnn$ctr476$ArticleList$cmdNext",
                        string.Empty,
                        string.Empty,
                        viewState,
                        (i - 2).ToString(),
                        "716",
                        eventValidation
                    });
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.Default, ref cookiestr);
                    }
                    catch (Exception ex) { continue; }
                }
                parser = new Parser(new Lexer(htl));
                NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "dnn_ctr476_ArticleList_PanelA")));
                if (tableNodeList != null && tableNodeList.Count > 0)
                {
                    TableTag table = tableNodeList.SearchFor(typeof(TableTag), true)[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        TableRow tr = table.Rows[j];
                        if (tr.ColumnCount < 2)
                        {
                            continue;
                        }
                        string prjName = string.Empty,
                               buildUnit = string.Empty, bidUnit = string.Empty,
                               bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty,
                               beginDate = string.Empty,
                               endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty,
                               msgType = string.Empty, bidCtx = string.Empty,
                               prjAddress = string.Empty, remark = string.Empty,
                               prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        prjName   = tr.Columns[0].ToPlainTextString().Trim();
                        beginDate = tr.Columns[1].ToPlainTextString().Trim();
                        ATag aTag = tr.Columns[0].SearchFor(typeof(ATag), true)[0] as ATag;

                        InfoUrl = "http://zb.zjcic.net" + aTag.Link.Replace("amp;", "").Trim();
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(InfoUrl), Encoding.Default).Replace("&nbsp;", "");
                        }
                        catch (Exception)
                        {
                            Logger.Error("BidZhanJiangJS");
                            continue;
                        }
                        Parser   parserdetail = new Parser(new Lexer(htmldetail));
                        NodeList dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "dnn_ctr377_ArticleShow_lblContent")));
                        if (dtnode.Count > 0)
                        {
                            HtmlTxt = dtnode.AsHtml();
                            bidCtx  = dtnode.AsString().Trim().Replace("&#160;", "").Trim();
                            if (bidCtx.Contains("推荐"))
                            {
                                bidUnit = bidCtx.Substring(bidCtx.IndexOf("推荐")).Replace("推荐", "").Trim();
                                bidUnit = bidUnit.Remove(bidUnit.IndexOf("中标")).Trim();
                                if (bidUnit.Contains("公司"))
                                {
                                    bidUnit = bidUnit.Remove(bidUnit.IndexOf("公司") + 2).Replace(":", "").Trim();
                                }
                                if (bidUnit.Contains("设计院"))
                                {
                                    bidUnit = bidUnit.Remove(bidUnit.IndexOf("设计院") + 3).Replace(":", "").Replace(":", "").Trim();
                                }
                            }
                            if (bidCtx.Contains("中标原则") && bidUnit == "")
                            {
                                bidUnit = bidCtx.Substring(bidCtx.IndexOf("中标原则")).Replace("中标原则", "").Replace(",", "").Trim();
                                bidUnit = bidUnit.Remove(bidUnit.IndexOf("中标")).Trim();
                                if (bidUnit.Contains("公司"))
                                {
                                    bidUnit = bidUnit.Remove(bidUnit.IndexOf("公司") + 2).Replace(":", "").Replace(",", "").Trim();
                                }
                                if (bidUnit.Contains("院"))
                                {
                                    bidUnit = bidUnit.Remove(bidUnit.IndexOf("设计院") + 3).Replace("为", "").Replace(":", "").Replace(":", "").Trim();
                                }
                            }
                            if (bidCtx.Contains("定标原则") && bidUnit == "")
                            {
                                bidUnit = bidCtx.Substring(bidCtx.IndexOf("定标原则")).Replace("定标原则", "").Replace(",", "").Trim();
                                bidUnit = bidUnit.Remove(bidUnit.IndexOf("中标")).Trim();
                                if (bidUnit.Contains("公司"))
                                {
                                    bidUnit = bidUnit.Remove(bidUnit.IndexOf("公司") + 2).Replace(":", "").Replace(",", "").Trim();
                                }
                                if (bidUnit.Contains("院"))
                                {
                                    bidUnit = bidUnit.Remove(bidUnit.IndexOf("设计院") + 3).Replace("为", "").Replace(":", "").Replace(":", "").Trim();
                                }
                            }
                            if (bidCtx.Contains("评标办法") && bidUnit == "以")
                            {
                                bidUnit = bidCtx.Substring(bidCtx.IndexOf("评标办法")).Replace("评标办法", "").Replace(",", "").Trim();
                                bidUnit = bidUnit.Remove(bidUnit.IndexOf("中标")).Trim();
                                if (bidUnit.Contains("公司"))
                                {
                                    bidUnit = bidUnit.Remove(bidUnit.IndexOf("公司") + 2).Replace(":", "").Replace(",", "").Trim();
                                }
                                if (bidUnit.Contains("院"))
                                {
                                    bidUnit = bidUnit.Remove(bidUnit.IndexOf("设计院") + 3).Replace("为", "").Replace(":", "").Replace(":", "").Trim();
                                }
                            }
                            if (bidCtx.Contains("中标价:"))
                            {
                                bidMoney = bidCtx.Substring(bidCtx.IndexOf("中标价:")).Replace("中标价:", "").Trim();
                                if (bidMoney.Contains("元"))
                                {
                                    bidMoney = bidMoney.Remove(bidMoney.IndexOf("元") + 1).Trim();
                                }
                            }
                            if (bidCtx.Contains("项目负责人:"))
                            {
                                prjMgr = bidCtx.Substring(bidCtx.IndexOf("项目负责人:")).Replace("项目负责人:", "").Trim();
                                prjMgr = prjMgr.Substring(0, 4).Replace(")", "").Replace("。", "").Replace(",", "").Replace(";", "").Trim();
                            }
                            Regex regBuidUnit = new Regex(@"(招标人|招标单位)(:|:)[^\r\n]+\r\n");
                            buildUnit = regBuidUnit.Match(bidCtx).Value.Replace("招标人:", "").Replace("招标单位:", "").Trim();
                            Regex regBidMoney = new Regex(@"[0-9]+[.]{0,1}[0-9]+");
                            if (bidMoney.Contains("万"))
                            {
                                bidMoney = bidMoney.Remove(bidMoney.IndexOf("万")).Trim();
                                bidMoney = regBidMoney.Match(bidMoney).Value;
                            }
                            else
                            {
                                try
                                {
                                    bidMoney = (decimal.Parse(regBidMoney.Match(bidMoney).Value) / 10000).ToString();
                                    if (decimal.Parse(bidMoney) < decimal.Parse("0.1"))
                                    {
                                        bidMoney = "0";
                                    }
                                }
                                catch (Exception)
                                {
                                    bidMoney = "0";
                                }
                            }
                            msgType  = "湛江市建设工程交易中心";
                            specType = "建设工程";
                            if (bidUnit == "的第一")
                            {
                                if (bidCtx.Contains("候选人"))
                                {
                                    bidUnit = bidCtx.Substring(bidCtx.IndexOf("候选人")).Replace("候选人", "").Trim();
                                    bidUnit = bidUnit.Remove(bidUnit.IndexOf("中标")).Trim();
                                    if (bidUnit.Contains("公司"))
                                    {
                                        bidUnit = bidUnit.Remove(bidUnit.IndexOf("公司") + 2).Replace(":", "").Trim();
                                    }
                                    if (bidUnit.Contains("设计院"))
                                    {
                                        bidUnit = bidUnit.Remove(bidUnit.IndexOf("设计院") + 3).Replace("为", "").Replace(":", "").Replace(":", "").Trim();
                                    }
                                }
                            }
                            if (bidUnit == "了")
                            {
                                parserdetail.Reset();
                                NodeList dtnodeF = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "MsoNormalTable")));
                                if (dtnodeF.Count <= 0)
                                {
                                    parserdetail.Reset();
                                    dtnodeF = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "MsoTableGrid")));
                                }
                                if (dtnodeF.Count > 0)
                                {
                                    string   bitext   = string.Empty;
                                    TableTag tableone = (TableTag)dtnodeF[0];
                                    for (int row = 0; row < tableone.RowCount; row++)
                                    {
                                        TableRow r = tableone.Rows[row];

                                        for (int k = 0; k < r.ColumnCount; k++)
                                        {
                                            string st  = string.Empty;
                                            string st1 = string.Empty;
                                            st = r.Columns[k].ToPlainTextString().Trim();
                                            if (k + 1 < r.ColumnCount)
                                            {
                                                st1 = r.Columns[k + 1].ToPlainTextString().Trim();
                                            }
                                            bitext += st + ":" + st1 + "\r\n";
                                            if (k + 1 <= r.ColumnCount)
                                            {
                                                k++;
                                            }
                                        }
                                    }
                                    bitext = bitext.Replace("(", "").Replace(")", "").Trim();
                                    Regex regBidUnit = new Regex(@"单位名称(:|:)[^\r\n]+\r\n");
                                    bidUnit = regBidUnit.Match(bitext).Value.Replace("中标单位:", "").Trim();
                                    Regex regMoney = new Regex(@"(中标价|中标价格)(:|:)[^\r\n]+\r\n");
                                    bidMoney = regMoney.Match(bitext).Value.Replace("中标价:", "").Replace("中标价格:", "").Replace(",", "").Trim();
                                    if (bidMoney.Contains("万"))
                                    {
                                        bidMoney = bidMoney.Remove(bidMoney.IndexOf("万")).Trim();
                                        bidMoney = regBidMoney.Match(bidMoney).Value;
                                    }
                                    else
                                    {
                                        try
                                        {
                                            bidMoney = (decimal.Parse(regBidMoney.Match(bidMoney).Value) / 10000).ToString();
                                            if (decimal.Parse(bidMoney) < decimal.Parse("0.1"))
                                            {
                                                bidMoney = "0";
                                            }
                                        }
                                        catch (Exception)
                                        {
                                            bidMoney = "0";
                                        }
                                    }
                                }
                            }
                            bidUnit = bidUnit.Replace("为", "").Replace(": ", "").Trim();
                            if (bidUnit == "了" || bidUnit == "以" || bidUnit == "的第一")
                            {
                                bidUnit = "";
                            }
                            if (buildUnit == "")
                            {
                                buildUnit = "";
                            }
                            prjName = ToolDb.GetPrjName(prjName);
                            prjName = prjName.Replace("·", "");
                            bidType = ToolHtml.GetInviteTypes(prjName);
                            BidInfo info = ToolDb.GenBidInfo("广东省", "湛江市区", "", string.Empty, code, prjName, buildUnit, beginDate,
                                                             bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType,
                                                             bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(null);
        }
Пример #11
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            //取得页码
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiostr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(null);
            }
            Parser parser = new Parser(new Lexer(html));

            //NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter(""),new HasAttributeFilter("","")));
            //if (pageNode != null && pageNode.Count > 0)
            //{

            //}
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "list")), true), new TagNameFilter("div")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    for (int j = 0; j < nodeList.Count; j++)
                    {
                        //continue;
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty,
                               infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        headName    = nodeList[j].GetATagValue("title");
                        releaseTime = nodeList[j].ToPlainTextString().GetDateRegex();
                        infoType    = "通知公告";
                        infoUrl     = "http://www.szjs.gov.cn/ztfw/gcjs/gzgg/" + nodeList[j].GetATagHref().Replace("../", "").Replace("./", "");

                        string htldtl = string.Empty;
                        if (infoUrl.Contains("http://www.sz.gov.cn/"))
                        {
                            infoUrl = nodeList[j].GetATagHref();
                        }
                        try
                        {
                            htldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch
                        {
                            continue;
                            //try
                            //{
                            //    infoUrl = nodeList[j].GetATagHref();
                            //    htldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString();
                            //}
                            //catch {

                            //    continue;
                            //}
                        }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList noList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "content")));
                        if (noList == null || noList.Count <= 0)
                        {
                            parser.Reset();
                            noList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "Custom_UnionStyle")));
                        }
                        if (noList == null || noList.Count <= 0)
                        {
                            parser.Reset();
                            noList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "contentWrap")));
                        }
                        if (noList != null && noList.Count > 0)
                        {
                            ctxHtml    = noList.AsHtml().Replace("<br/>", "\r\n").Replace("<BR/>", "");
                            infoCtx    = noList.AsString().Replace(" ", "").Replace("&nbsp;", "").Replace("\t\t", "\t").Replace("\t\t", "\t");
                            infoCtx    = Regex.Replace(infoCtx, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase).Replace(" ", "").Replace("\t", "").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n");
                            msgType    = MsgTypeCosnt.ShenZhenZJJMsgType;
                            infoScorce = infoScorce.Replace("&nbsp;", "");
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "深圳市工程", string.Empty, infoCtx, infoType);
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            else
                            {
                                sqlCount++;
                                if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                                {
                                    parser = new Parser(new Lexer(htldtl));
                                    NodeList aList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "same")), true), new TagNameFilter("a")));
                                    if (aList == null || aList.Count <= 0)
                                    {
                                        parser.Reset();
                                        aList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "contentWrap")), true), new TagNameFilter("a")));
                                    }
                                    if (aList != null && aList.Count > 0)
                                    {
                                        for (int k = 0; k < aList.Count; k++)
                                        {
                                            ATag a = aList[k].GetATag();
                                            if (a.IsAtagAttach())
                                            {
                                                try
                                                {
                                                    string temp = nodeList[j].GetATagHref();
                                                    string link = string.Empty;
                                                    if (temp.Contains("http"))
                                                    {
                                                        string tem = temp.GetRegexBegEnd("tzgg/", "/");
                                                        link = "http://www.sz.gov.cn/jsj/qt/tzgg/" + tem + "/" + a.Link.Replace("./", "");
                                                    }
                                                    else
                                                    {
                                                        string tem = infoUrl.GetRegexBegEnd("gzgg/", "/");
                                                        link = "http://www.szjs.gov.cn/ztfw/gcjs/gzgg/" + tem + "/" + a.Link.Replace("./", "");
                                                    }
                                                    BaseAttach obj = ToolHtml.GetBaseAttach(link, a.LinkText, info.Id);
                                                    if (obj != null)
                                                    {
                                                        ToolDb.SaveEntity(obj, string.Empty);
                                                    }
                                                }
                                                catch { }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
Пример #12
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 3;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.UTF8, ref cookiestr);
            }
            catch (Exception ex)
            {
                return(list);
            }

            for (int i = 1; i <= page; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl + "&page.number=" + i.ToString()), Encoding.UTF8);
                    }
                    catch (Exception ex) { continue; }
                }
                Parser parser = new Parser(new Lexer(htl));
                parser = new Parser(new Lexer(htl));
                NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "table_title3")));
                if (tableNodeList.Count > 0)
                {
                    TableTag table = (TableTag)tableNodeList[0];
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty,
                               code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty,
                               bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty,
                               otherType = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr = table.Rows[j];
                        prjName = tr.Columns[4].ToPlainTextString().Trim();
                        ATag aTag = tr.Columns[4].SearchFor(typeof(ATag), true)[0] as ATag;
                        beginDate = tr.Columns[6].ToPlainTextString().Trim();
                        endDate   = tr.Columns[8].ToPlainTextString().Trim();

                        InfoUrl = "http://bidding.cnpec.com.cn/member/" + aTag.Link.Replace("amp;", "").Trim();
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(InfoUrl), Encoding.UTF8).Replace("&nbsp;", "");
                        }
                        catch (Exception)
                        {
                            continue;
                        }
                        Parser   parserdetail = new Parser(new Lexer(htmldetail));
                        NodeList dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "100%")));
                        if (dtnode.Count <= 0)
                        {
                            Parser   parserdetailDiv = new Parser(new Lexer(htmldetail));
                            NodeList dtnodelDiv      = parserdetailDiv.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "font01")));
                            HtmlTxt = dtnodelDiv.AsHtml();
                            bidCtx  = dtnodelDiv.AsString().Trim();
                            Regex regBuidUnit = new Regex(@"中标单位(:|:)[^\r\n]+\r\n");
                            bidUnit    = regBuidUnit.Match(bidCtx).Value.Replace("中标单位:", "").Trim();
                            buildUnit  = "中广核工程有限公司";
                            bidMoney   = "0";
                            msgType    = "中广核工程有限公司";
                            specType   = "建设工程";
                            prjAddress = "见中标信息";
                            remark     = "国际中标";
                            if (bidUnit == "")
                            {
                                bidUnit = "";
                            }
                            bidType = ToolHtml.GetInviteTypes(prjName);
                            BidInfo info = ToolDb.GenBidInfo("广东省", "深圳社会招标", "", string.Empty, code, prjName, buildUnit, beginDate,
                                                             bidUnit, beginDate, endDate, bidCtx, remark, msgType, bidType, specType, otherType,
                                                             bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                        if (dtnode.Count > 0)
                        {
                            TableTag tableNode = (TableTag)dtnode[0];
                            HtmlTxt = dtnode.AsHtml();
                            for (int k = 1; k < tableNode.RowCount; k++)
                            {
                                TableRow trow = tableNode.Rows[k];
                                for (int c = 0; c < trow.ColumnCount; c++)
                                {
                                    string tr1 = string.Empty;
                                    tr1 = trow.Columns[c].ToPlainTextString().Trim();
                                    if (tr1.Contains("中标候选人") && k + 1 < tableNode.RowCount)
                                    {
                                        bidUnit = tableNode.Rows[k + 1].Columns[0].ToPlainTextString().Trim();
                                    }
                                    bidCtx += "\r\n" + tr1;
                                }
                            }
                            bidCtx += "\r\n";
                            Regex regCode = new Regex(@"招标编号(:|:)[^\r\n]+\r\n");
                            code = regCode.Match(bidCtx).Value.Replace("招标编号:", "").Trim();
                            Regex regBuidUnit = new Regex(@"(招标人|建设单位)(:|:)[^\r\n]+\r\n");
                            buildUnit = regBuidUnit.Match(bidCtx).Value.Replace("招标人:", "").Replace("建设单位:", "").Trim();
                            Regex regMoney = new Regex(@"中标价(:|:)[^\r\n]+\r\n");
                            bidMoney = regMoney.Match(bidCtx).Value.Replace("中标价:", "").Replace(",", "").Replace("RMB", "").Trim();
                            if (bidMoney.Contains("EUR"))
                            {
                                bidMoney = "0";
                            }
                            Regex regBidMoney = new Regex(@"[0-9]+[.]{0,1}[0-9]+");
                            if (!string.IsNullOrEmpty(regBidMoney.Match(bidMoney).Value))
                            {
                                if (bidMoney.Contains("万元") || bidMoney.Contains("万美元") || bidMoney.Contains("万"))
                                {
                                    bidMoney = regBidMoney.Match(bidMoney).Value;
                                }
                                else
                                {
                                    try
                                    {
                                        bidMoney = (decimal.Parse(regBidMoney.Match(bidMoney).Value) / 10000).ToString();
                                        if (decimal.Parse(bidMoney) < decimal.Parse("0.1"))
                                        {
                                            bidMoney = "0";
                                        }
                                    }
                                    catch (Exception)
                                    {
                                        bidMoney = "0";
                                    }
                                }
                            }
                            msgType  = "中广核工程有限公司";
                            specType = "建设工程";
                            if (buildUnit == "")
                            {
                                buildUnit = "";
                            }
                            if (Encoding.Default.GetByteCount(bidUnit) > 50)
                            {
                                Regex regbidUnit = new Regex(@"[^\r\n]+\r\n");
                                bidUnit = regbidUnit.Match(bidUnit).Value.Trim();
                            }
                            InfoUrl    = InfoUrl.Replace("filter_EQ_isinternational=0", "filter_EQ_isinternational=1");
                            prjAddress = "见中标信息";
                            remark     = "国际中标";
                            prjName    = ToolDb.GetPrjName(prjName);
                            BidInfo info = ToolDb.GenBidInfo("广东省", "深圳社会招标", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, remark, msgType, bidType, specType, otherType,
                                                             bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Пример #13
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new List <BidInfo>();
            //取得页码
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(list);
            }

            Parser   parser  = new Parser(new Lexer(html));
            NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "digg")));

            if (tdNodes != null && tdNodes.Count > 0)
            {
                string pageTemp = tdNodes.AsString().Replace("&nbsp;", "").Trim();
                Regex  regpage  = new Regex(@"共\d+页");
                try
                {
                    pageInt = int.Parse(regpage.Match(pageTemp).Value.Replace("共", "").Replace("页", "").Trim());
                }
                catch (Exception ex) { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl + "&page=" + i.ToString()), Encoding.UTF8);
                    }
                    catch (Exception ex) { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("width", "100%"), new TagNameFilter("table")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = (TableTag)nodeList[4];
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        code      = table.Rows[j].Columns[0].ToPlainTextString().Trim();
                        prjName   = table.Rows[j].Columns[1].ToPlainTextString().Trim();
                        beginDate = table.Rows[j].Columns[2].ToPlainTextString().GetDateRegex();
                        ATag aTag = table.Rows[j].SearchFor(typeof(ATag), true)[0] as ATag;
                        InfoUrl = "http://www.szldzb.com/" + aTag.Link;
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString().Replace("&nbsp;", "").Trim();
                            Parser   dtlparserHTML = new Parser(new Lexer(htmldetail));
                            NodeList dtnodeHTML    = dtlparserHTML.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("width", "620"), new TagNameFilter("table")));
                            HtmlTxt = dtnodeHTML.AsHtml();
                        }
                        catch (Exception ex) { continue; }
                        Parser   dtlparser = new Parser(new Lexer(htmldetail));
                        NodeList dtnode    = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("width", "620"), new TagNameFilter("table")));

                        bidCtx = dtnode.AsString().Trim().ToLower().Replace(" ", "").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "").Replace("<?xml:namespaceprefix=st1/>", "").Replace("startfragment", "").Replace("endfragment", "");

                        bidUnit = bidCtx.GetBidRegex();
                        if (string.IsNullOrEmpty(bidUnit))
                        {
                            bidUnit = bidCtx.GetBidRegex(new string[] { "成交人" });
                        }
                        bidMoney = bidCtx.GetMoneyRegex(null, false, "万元整,万元");
                        string monerystr = string.Empty;
                        if (string.IsNullOrEmpty(bidUnit) && (bidMoney == "0" || string.IsNullOrEmpty(bidMoney)))
                        {
                            Parser   par     = new Parser(new Lexer(HtmlTxt));
                            NodeList listCon = par.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "MsoNormalTable")));
                            if (listCon != null && listCon.Count > 0)
                            {
                                TableTag tab  = listCon[0] as TableTag;
                                string   txt1 = string.Empty;
                                string   txt2 = string.Empty;
                                try
                                {
                                    for (int k = 0; k < 1; k++)
                                    {
                                        for (int d = 0; d < tab.Rows[k].ColumnCount; d++)
                                        {
                                            txt1  = tab.Rows[k].Columns[d].ToPlainTextString().Trim() + ":";
                                            txt2 += txt1 + tab.Rows[k + 1].Columns[d].ToPlainTextString().Trim() + "\r\n";
                                        }
                                    }
                                }
                                catch { }
                                bidUnit  = txt2.GetBidRegex();
                                bidMoney = txt2.GetMoneyRegex();
                            }
                        }
                        if (!string.IsNullOrEmpty(bidMoney) && bidMoney != "0")
                        {
                            if (decimal.Parse(bidMoney) > 100000)
                            {
                                bidMoney = (decimal.Parse(bidMoney) / 10000).ToString();
                            }
                        }
                        specType = "其他";
                        msgType  = "深圳龙达招标有限公司";
                        prjName  = ToolDb.GetPrjName(prjName);
                        bidType  = ToolHtml.GetInviteTypes(prjName);
                        BidInfo info = ToolDb.GenBidInfo("广东省", "深圳社会招标", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                        list.Add(info);



                        if (!crawlAll && list.Count >= this.MaxCount)
                        {
                            return(list);
                        }
                    }
                }
            }
            return(list);
        }
Пример #14
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("vAlign", "bottom")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("总页数:", "当前");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState = this.ToolWebSite.GetAspNetViewState(html);
                    string __CSRFTOKEN      = ToolHtml.GetHtmlInputValue(html, "__CSRFTOKEN");
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__CSRFTOKEN",
                        "__VIEWSTATE",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT"
                    },
                                                                                      new string[] {
                        __CSRFTOKEN,
                        viewState,
                        "MoreInfoList1$Pager",
                        i.ToString()
                    });
                    try
                    {
                        cookiestr = cookiestr.GetReplace(new string[] { "path=/;", "HttpOnly", "," });
                        html      = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                    }
                    catch
                    {
                        continue;
                    }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string   prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        prjName   = aTag.GetAttribute("title");
                        beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://www.spprec.com" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch
                        {
                            continue;
                        }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent")));
                        if (dtlNode == null || dtlNode.Count < 1)
                        {
                            dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "ivs_content")));
                        }
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.AsHtml();
                            bidCtx  = HtmlTxt.GetReplace("<br />,<br/>,<br>,</p>", "\r\n").ToCtxString();
                            parser  = new Parser(new Lexer(HtmlTxt));
                            NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                            if (tableNode != null && tableNode.Count > 0)
                            {
                                bidCtx = string.Empty;
                                TableTag htmlTable = tableNode[0] as TableTag;
                                for (int r = 0; r < htmlTable.RowCount; r++)
                                {
                                    for (int c = 0; c < htmlTable.Rows[r].ColumnCount; c++)
                                    {
                                        string temp = htmlTable.Rows[r].Columns[c].ToNodePlainString();

                                        if (string.IsNullOrEmpty(temp))
                                        {
                                            continue;
                                        }
                                        if ((c + 1) % 2 == 0)
                                        {
                                            bidCtx += temp + "\r\n";
                                        }
                                        else
                                        {
                                            bidCtx += temp + ":";
                                        }
                                    }
                                }
                            }

                            buildUnit  = bidCtx.GetBuildRegex().GetReplace("/");
                            prjAddress = bidCtx.GetAddressRegex();
                            code       = bidCtx.GetCodeRegex().GetChina().GetCodeDel();
                            bidUnit    = bidCtx.GetRegexBegEnd("成交供应商及报价:", ",");
                            if (string.IsNullOrEmpty(bidUnit))
                            {
                                bidUnit = bidCtx.GetBidRegex().GetReplace("/");
                            }
                            if (bidUnit.Contains("公司"))
                            {
                                bidUnit = bidUnit.Remove(bidUnit.IndexOf("公司")) + "公司";
                            }
                            if (!bidCtx.Contains("废标"))
                            {
                                bidMoney = bidCtx.GetMoneyRegex();
                            }
                            bidUnit  = bidUnit.GetReplace("第一包,1,、,:");
                            msgType  = "四川省公共资源交易中心";
                            specType = bidType = "政府采购";

                            BidInfo info = ToolDb.GenBidInfo("四川省", "四川省及地市", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag tag = aNode[k] as ATag;
                                    if (tag.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (tag.Link.ToLower().Contains("http"))
                                        {
                                            link = tag.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.spprec.com" + tag.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(tag.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                        else
                        {
                            Logger.Error("无内容" + InfoUrl);
                        }
                    }
                }
            }
            return(list);
        }
Пример #15
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list = new List <ProjectFinish>();
            string htl = string.Empty;
            string cookiestr = string.Empty;
            string viewState = string.Empty;
            int    pageInt = 1, count = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = ToolHtml.GetHtmlByUrlEncode(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser  = new Parser(new Lexer(htl));
            NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "pageLinkTd")));

            if (tdNodes != null && tdNodes.Count > 0)
            {
                try
                {
                    string temp = tdNodes.AsString().ToNodeString();
                    string s    = temp.GetRegexBegEnd("总页数", "页").Replace(":", "");
                    pageInt = int.Parse(s);
                }
                catch (Exception ex) { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[]
                    {
                        "page",
                        "qymc",
                        "ann_serial",
                        "pro_name"
                    }, new string[] {
                        i.ToString(),
                        "",
                        "",
                        ""
                    });
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8);
                    }
                    catch (Exception ex) { continue; }
                }
                parser = new Parser(new Lexer(htl));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "tblPrjConstBid")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = (TableTag)listNode[0];
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        string pUrl = string.Empty, pInfoSource = string.Empty, pEndDate = string.Empty,
                               pConstUnit = string.Empty, pSuperUnit = string.Empty, pDesignUnit = string.Empty,
                               prjEndDesc = string.Empty, pPrjAddress = string.Empty, pBuildUnit = string.Empty,
                               pPrjCode = string.Empty, PrjName = string.Empty, pRecordUnit = string.Empty,
                               pCreatetime = string.Empty, pLicUnit = string.Empty;

                        TableRow tr = table.Rows[j];
                        pPrjCode   = tr.Columns[0].ToNodePlainString();
                        PrjName    = tr.Columns[1].ToNodePlainString();
                        pBuildUnit = tr.Columns[2].ToNodePlainString();
                        pEndDate   = tr.Columns[3].ToNodePlainString().GetDateRegex();


                        if (string.IsNullOrEmpty(pRecordUnit))
                        {
                            pRecordUnit = "深圳市住房和建设局";
                        }
                        ProjectFinish info = ToolDb.GenProjectFinish("广东省", pUrl, "深圳市区", pInfoSource, pEndDate, pConstUnit, pSuperUnit, pDesignUnit, prjEndDesc, pPrjAddress, pBuildUnit, pPrjCode, PrjName, pRecordUnit, pCreatetime, "深圳市住房和建设局", pLicUnit);
                        list.Add(info);
                        if (!crawlAll && list.Count >= this.MaxCount)
                        {
                            return(list);
                        }
                        count++;
                        if (count >= 200)
                        {
                            count = 1;
                            Thread.Sleep(600 * 1000);
                        }
                    }
                }
            }
            return(list);
        }
Пример #16
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default, ref cookiestr);
                Regex regexHtml = new Regex(@"<script[^<]*</script>");
                htl = regexHtml.Replace(htl, "");
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser    = new Parser(new Lexer(htl));
            NodeList nodeList  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("align", "right")));
            Regex    regexPage = new Regex(@"共\d+页");

            try
            {
                page = int.Parse(regexPage.Match(nodeList.AsString()).Value.Trim(new char[] { '共', '页' }));
            }
            catch (Exception)
            { }
            for (int i = 1; i < page; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl + "&otype=&pageNum=" + i.ToString()), Encoding.Default);
                        Regex regexHtml = new Regex(@"<script[^<]*</script>");
                        htl = regexHtml.Replace(htl, "");
                    }
                    catch (Exception ex) { continue; }
                }
                parser = new Parser(new Lexer(htl));
                NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellpadding", "1")));
                if (tableNodeList != null && tableNodeList.Count > 0)
                {
                    TableTag table = (TableTag)tableNodeList[0];
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string   code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, bidType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr = table.Rows[j];
                        prjName   = tr.Columns[0].ToPlainTextString().Trim();
                        beginDate = tr.Columns[1].ToPlainTextString().Replace("&nbsp; ", "").Trim().Substring(0, 10);
                        ATag aTag = tr.Columns[0].SearchFor(typeof(ATag), true)[0] as ATag;
                        InfoUrl = "http://market.meizhou.gov.cn/website/deptwebsite/1925/Content.jsp?issueId=15488&msgType=00&filePath=" + aTag.GetAttribute("onclick").Replace("showDeptContent('1925','", "");
                        int    ii = InfoUrl.IndexOf("'");
                        string oo = InfoUrl.Remove(ii).Trim();

                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(oo), Encoding.Default).Replace("&nbsp;", "");
                            Regex regexHtml = new Regex(@"<script[^<]*</script>");
                            htmldetail = regexHtml.Replace(htmldetail, "");
                        }
                        catch (Exception)
                        {
                            Logger.Error("InviteMeiZhouCityJS");
                            continue;
                        }
                        Parser   parserdetail = new Parser(new Lexer(htmldetail));
                        NodeList dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("P"), new HasAttributeFilter("class", "MsoNormal")));
                        if (dtnode.Count > 0 && dtnode != null)
                        {
                            HtmlTxt = dtnode.AsHtml();
                            for (int k = 0; k < dtnode.Count; k++)
                            {
                                string tr1 = string.Empty;
                                tr1 = dtnode[k].ToPlainTextString().Replace(" ", "").Trim();
                                if (k == 0)
                                {
                                    string InvType = tr1;
                                    bidType = ToolHtml.GetInviteTypes(InvType);
                                }
                                inviteCtx += tr1 + ":" + "\r\n";
                            }
                            Regex regPrjAddr = new Regex(@"(工程地点|建设地点):[^\r\n]+\r\n");
                            prjAddress = regPrjAddr.Match(inviteCtx).Value.Replace("工程地点:", "").Replace("建设地点", "").Replace(":", "").Replace(";", "").Trim();

                            Regex bildUnit = new Regex(@"(招标人|招标人(盖章)|招标人):[^\r\n]+[\r\n]{1}");
                            buildUnit = bildUnit.Match(inviteCtx).Value.Replace("招  标人:", "").Replace("招标人(盖章):", "").Replace("招标人:", "").Trim();
                            if (buildUnit != "")
                            {
                                int zz = buildUnit.IndexOf(":");
                                buildUnit = buildUnit.Remove(zz).ToString();
                            }
                            Regex regcode = new Regex(@"(招标项目编号|项目编号)(:|:)[^\r\n]+[\r\n]{1}");
                            code = regcode.Match(inviteCtx).Value.Replace("招标项目编号", "").Replace("项目编号", "").Replace(":", "").Replace(":", "").Trim();

                            Regex  regoType = new Regex(@"工程类型:[^\r\n]+\r\n");
                            string oType    = regoType.Match(inviteCtx).Value.Replace("工程类型:", "").Trim();
                            if (oType.Contains("房建"))
                            {
                                otherType = "房建及工业民用建筑";
                            }
                            else if (oType.Contains("市政"))
                            {
                                otherType = "市政工程";
                            }
                            else if (oType.Contains("园林绿化"))
                            {
                                otherType = "园林绿化工程";
                            }
                            else if (oType.Contains("装饰") || oType.Contains("装修"))
                            {
                                otherType = "装饰装修工程";
                            }
                            else if (oType.Contains("电力"))
                            {
                                otherType = "电力工程";
                            }
                            else if (oType.Contains("水利"))
                            {
                                otherType = "水利工程";
                            }
                            if (oType.Contains("环保"))
                            {
                                otherType = "环保工程";
                            }
                            if (buildUnit == "")
                            {
                                buildUnit = "";
                            }
                            if (Encoding.Default.GetByteCount(code) > 50)
                            {
                                code = string.Empty;
                            }
                            msgType  = "梅州市建设工程交易中心";
                            specType = "建设工程";
                            Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>");
                            inviteCtx = regexHtml.Replace(inviteCtx, "");
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "梅州市区", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, bidType, specType, otherType, oo, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(null);
        }
Пример #17
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "bmdt_fy")));

            if (sNode != null && sNode.Count > 0)
            {
                try
                {
                    string temp = sNode.AsString().Replace("createPageHTML(", "").Replace("index", "").Replace("html", "").Replace(", 0,", "").Replace(");", "").Replace(",", "").Replace(";", "").Replace(")", "").Replace("\"", "").Replace(" ", "");
                    pageInt = Convert.ToInt32(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://syjdb.baoan.gov.cn/xxgk_12101/ywxx/zbcg/zbxxgs/index_" + (i - 1).ToString() + ".html", Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList viewList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "new_list01"))), new TagNameFilter("ul"))), new TagNameFilter("li")));
                if (viewList != null && viewList.Count > 0)
                {
                    for (int j = 0; j < viewList.Count; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        Regex regDate = new Regex(@"\d{4}-\d{1,2}-\d{1,2}");
                        beginDate = regDate.Match(viewList[j].ToPlainTextString().Trim()).Value;
                        string temp = viewList[j].ToPlainTextString().Trim().Replace(beginDate, "");
                        try
                        {
                            int beg = temp.IndexOf("else"), end = temp.Length;
                            temp    = temp.Substring(beg, end - beg);
                            beg     = temp.LastIndexOf("<a");
                            end     = temp.LastIndexOf("/a>");
                            temp    = temp.Substring(beg, (end - beg) + 3);
                            beg     = temp.IndexOf(">");
                            end     = temp.IndexOf("</");
                            prjName = temp.Substring(beg + 1, end - beg - 1);
                            Parser   p    = new Parser(new Lexer(temp));
                            NodeList l    = p.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            ATag     aTag = l.SearchFor(typeof(ATag), true)[0] as ATag;
                            InfoUrl = "http://syjdb.baoan.gov.cn/xxgk_12101/ywxx/zbcg/zbxxgs/" + aTag.Link.Replace("../", "").Replace("./", "");
                        }
                        catch { continue; }
                        string htlDtl = string.Empty;
                        try
                        {
                            htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8);
                            Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>|<style[^<]*</style>|<xml[^<]*</xml>");
                            htlDtl = regexHtml.Replace(htlDtl, "");
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htlDtl));
                        NodeList dtl = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "DivContent")));
                        if (dtl != null && dtl.Count > 0)
                        {
                            HtmlTxt   = dtl.AsHtml();
                            inviteCtx = dtl.AsString().Replace("&nbsp;", "").Replace(" ", "").Replace("\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n");
                            string InvType = prjName;
                            if (InvType.Contains("施工"))
                            {
                                inviteType = "施工";
                            }
                            if (InvType.Contains("监理"))
                            {
                                inviteType = "监理";
                            }
                            if (InvType.Contains("设计"))
                            {
                                inviteType = "设计";
                            }
                            if (InvType.Contains("勘察"))
                            {
                                inviteType = "勘察";
                            }
                            if (InvType.Contains("服务"))
                            {
                                inviteType = "服务";
                            }
                            if (InvType.Contains("劳务分包"))
                            {
                                inviteType = "劳务分包";
                            }
                            if (InvType.Contains("专业分包"))
                            {
                                inviteType = "专业分包";
                            }
                            if (InvType.Contains("小型施工"))
                            {
                                inviteType = "小型工程";
                            }
                            if (InvType.Contains("设备材料"))
                            {
                                inviteType = "设备材料";
                            }
                            Regex regPrjAddr = new Regex(@"(工程位置|工程地点|工程地址|详细地址|地点|地址)(:|:)[^\r\n]+\r\n");
                            prjAddress = regPrjAddr.Match(inviteCtx).Value.Replace("工程位置", "").Replace("工程地点", "").Replace("工程地址", "").Replace("详细地址", "").Replace("地点", "").Replace("地址", "").Replace(":", "").Replace(":", "").Trim();
                            Regex regBuildUnit = new Regex(@"(招标代理机构|招标单位|招标人|招标单位(盖章))(:|:)[^\r\n]+\r\n");
                            buildUnit = regBuildUnit.Match(inviteCtx).Value.Replace("招标代理机构", "").Replace("招标单位", "").Replace("招标人", "").Replace("(盖章)", "").Replace(":", "").Replace(":", "").Trim();
                            Regex regPrjCode = new Regex(@"(工程编号|项目编号|编号)(:|:)[^\r\n]+\r\n");
                            code    = regPrjCode.Match(inviteCtx).Value.Replace("工程编号", "").Replace("项目编号", "").Replace("编号", "").Replace(":", "").Replace(":", "").Trim();
                            msgType = "深圳市宝安区石岩街道办事处";
                            if (string.IsNullOrEmpty(prjAddress) || Encoding.Default.GetByteCount(prjAddress) > 150)
                            {
                                prjAddress = "见招标信息";
                            }
                            code       = ToolHtml.GetSubString(code, 50);
                            buildUnit  = ToolHtml.GetSubString(buildUnit, 150);
                            specType   = "建设工程";
                            inviteType = "小型工程";
                            if (string.IsNullOrEmpty(buildUnit))
                            {
                                buildUnit = "深圳市宝安区石岩街道办事处";
                            }
                            inviteType = ToolHtml.GetInviteType(inviteType);
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳区及街道工程", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Пример #18
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookieStr       = string.Empty;
            int    pageInt         = 1;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "tzgg_right_page")), true), new TagNameFilter("a")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    ATag   aTag = pageList[pageList.Count - 2] as ATag;
                    string tem  = aTag.LinkText;
                    pageInt = Convert.ToInt32(tem.Replace("goPage(", "").Replace(")", ""));
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        string url = "http://www.gzzb.gd.cn/cms/wz/view/tzygg/enterpriseAchievementServlet?name=&number=&projectName=&projectNumber=&siteId=1&channelId=19&pager.offset=" + i.ToString() + "0";
                        html = this.ToolWebSite.GetHtmlByUrl(url, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "table1")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string InfoTitle = string.Empty, InfoType = string.Empty, PublistTime = string.Empty, InfoCtx = string.Empty, InfoUrl = string.Empty, prjCode = string.Empty, buildUnit = string.Empty, htmlTxt = string.Empty;

                        TableRow tr = table.Rows[j];
                        prjCode     = tr.Columns[1].ToNodePlainString();
                        InfoTitle   = tr.Columns[2].ToNodePlainString();
                        buildUnit   = tr.Columns[4].ToNodePlainString();
                        PublistTime = tr.Columns[5].ToPlainTextString();
                        InfoType    = "业绩公示";
                        InfoUrl     = "http://www.gzzb.gd.cn" + tr.Columns[2].GetATagHref();
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = ToolHtml.GetHtmlByUrl(InfoUrl, Encoding.Default);
                            htldtl = htldtl.GetJsString();
                        }
                        catch
                        {
                            continue;
                        }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "block-body")));
                        //if (dtlList != null && dtlList.Count > 0)
                        //{
                        //    InfoCtx = dtlList.AsString().ToCtxString().Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n");
                        //}
                        InfoCtx = "项目编号:" + prjCode + "\r\n项目名称:" + InfoTitle + "\r\n单位编号:" + tr.Columns[3].ToNodePlainString() + "\r\n单位名称:" + buildUnit + "\r\n审核时间:" + PublistTime;
                        htmlTxt = InfoCtx;
                        NoticeInfo info = ToolDb.GenNoticeInfo("广东省", "广州市区", string.Empty, string.Empty, InfoTitle, InfoType, InfoCtx, PublistTime, string.Empty, MsgTypeCosnt.GuangZhouMsgType, InfoUrl, prjCode, buildUnit, string.Empty, string.Empty, string.Empty, string.Empty, htmlTxt);
                        list.Add(info);
                        //parser = new Parser(new Lexer(dtlList.AsHtml()));
                        //NodeList aList = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                        //if (aList != null && aList.Count > 0)
                        //{
                        //    for (int c = 0; c < aList.Count; c++)
                        //    {
                        //        ATag aTag = aList[c].GetATag();
                        //        if (aTag.IsAtagAttach())
                        //        {
                        //            string alink = "http://www.gzzb.gd.cn" + aTag.Link;
                        //            BaseAttach attach = ToolDb.GenBaseAttach(aTag.LinkText.Replace("&nbsp", "").Replace(";", "").Replace(";", ""), info.Id, alink);
                        //            base.AttachList.Add(attach);
                        //        }
                        //    }
                        //}
                        if (!crawlAll && list.Count >= this.MaxCount)
                        {
                            return(list);
                        }
                    }
                }
            }
            return(list);
        }
Пример #19
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default, ref cookiestr);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser    = new Parser(new Lexer(htl));
            NodeList nodeList  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("align", "right")));
            Regex    regexPage = new Regex(@"\d+页");

            try
            {
                page = Convert.ToInt32(regexPage.Match(nodeList.AsString()).Value.Replace("页", "").Trim());
            }
            catch (Exception)
            { }
            for (int i = 1; i <= page; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl + "&page=" + i.ToString()), Encoding.Default);
                    }
                    catch (Exception ex) { continue; }
                }
                parser = new Parser(new Lexer(htl));
                NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("align", "center")));
                if (tableNodeList != null && tableNodeList.Count > 1)
                {
                    TableTag table = (TableTag)tableNodeList[3];
                    for (int j = 0; j < table.RowCount - 1; j++)
                    {
                        string prjName = string.Empty,
                               buildUnit = string.Empty, bidUnit = string.Empty,
                               bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty,
                               beginDate = string.Empty,
                               endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty,
                               msgType = string.Empty, bidCtx = string.Empty,
                               prjAddress = string.Empty, remark = string.Empty,
                               prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[0].SearchFor(typeof(ATag), true)[1] as ATag;
                        prjName   = aTag.LinkText;
                        beginDate = tr.Columns[1].ToPlainTextString().Trim();
                        InfoUrl   = "http://zhaobiao.szpt.edu.cn/" + aTag.Link;
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(InfoUrl), Encoding.Default);
                        }
                        catch (Exception)
                        {
                            continue;
                        }
                        Parser   parserdetail = new Parser(new Lexer(htmldetail));
                        NodeList dtnode       = parserdetail.ExtractAllNodesThatMatch(new TagNameFilter("p"));
                        if (dtnode.Count > 0)
                        {
                            HtmlTxt = dtnode.AsHtml();
                            Regex regeximg = new Regex(@"<IMG[^>]*>");//去掉图片
                            HtmlTxt = regeximg.Replace(HtmlTxt, "");
                            for (int z = 0; z < dtnode.Count; z++)
                            {
                                bidCtx += dtnode[z].ToPlainTextString().Replace("&nbsp;", "").Trim() + "\r\n";
                            }
                            Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>");
                            bidCtx = regexHtml.Replace(bidCtx, "");
                            Regex regcode = new Regex(@"(项目编号|招标编号)(:|:)[^\r\n]+\r\n");
                            code = regcode.Match(bidCtx).Value.Replace("项目编号:", "").Replace("招标编号:", "").Replace(":", "").Trim();
                            Regex regBidUnit = new Regex(@"(成交单位|中标单位)(:|:)[^\r\n]+\r\n");
                            bidUnit = regBidUnit.Match(bidCtx).Value.Replace("成交单位:", "").Replace("中标单位:", "").Replace("中标折扣率:72.5%", "").Trim();
                            Regex regMoney = new Regex(@"(中标价|中标价格)(:|:)[^\r\n]+\r\n");
                            bidMoney = regMoney.Match(bidCtx).Value.Replace("中标价:", "").Replace("中标价格:", "").Replace(",", "").Trim();
                            Regex regBidMoney = new Regex(@"[0-9]+[.]{0,1}[0-9]+");
                            if (bidMoney.Contains("万"))
                            {
                                bidMoney = bidMoney.Remove(bidMoney.IndexOf("万")).Trim();
                                bidMoney = regBidMoney.Match(bidMoney).Value;
                            }
                            else
                            {
                                try
                                {
                                    bidMoney = (decimal.Parse(regBidMoney.Match(bidMoney).Value) / 10000).ToString();
                                    if (decimal.Parse(bidMoney) < decimal.Parse("0.1"))
                                    {
                                        bidMoney = "0";
                                    }
                                }
                                catch (Exception)
                                {
                                    bidMoney = "0";
                                }
                            }
                            if (bidType == "设备材料" || bidType == "小型施工" || bidType == "专业分包" || bidType == "劳务分包" || bidType == "服务" || bidType == "勘察" || bidType == "设计" || bidType == "监理" || bidType == "施工")
                            {
                                specType = "建设工程";
                            }
                            else
                            {
                                specType = "其他";
                            }
                            if (buildUnit == "")
                            {
                                buildUnit = "";
                            }
                            msgType = "深职院";
                            bidType = ToolHtml.GetInviteTypes(bidType);
                            prjName = ToolDb.GetPrjName(prjName);
                            if (Encoding.Default.GetByteCount(code) > 50)
                            {
                                code = string.Empty;
                            }
                            if (Encoding.Default.GetByteCount(buildUnit) > 150)
                            {
                                buildUnit = string.Empty;
                            }
                            if (Encoding.Default.GetByteCount(bidUnit) > 150)
                            {
                                bidUnit = string.Empty;
                            }
                            if (Encoding.Default.GetByteCount(prjAddress) > 150)
                            {
                                prjAddress = string.Empty;
                            }
                            BidInfo info = ToolDb.GenBidInfo("广东省", "深圳社会招标", "", string.Empty, code, prjName, buildUnit, beginDate,
                                                             bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType,
                                                             bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(null);
        }
Пример #20
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            //取得页码
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "ctl00_cph_context_GridViewPaingTwo1_lblGridViewPagingDesc")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    string temp = pageList.AsString();
                    pageInt = Convert.ToInt32(temp.GetRegexBegEnd("共", "页"));
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        viewState       = this.ToolWebSite.GetAspNetViewState(html);
                        eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                        NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(
                            new string[] {
                            "__VIEWSTATE",
                            "__EVENTVALIDATION",
                            "ctl00$cph_context$GridViewPaingTwo1$txtGridViewPagingForwardTo",
                            "ctl00$cph_context$GridViewPaingTwo1$btnForwardToPage"
                        },
                            new string[] {
                            viewState, eventValidation, i.ToString(), "GO"
                        }
                            );
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_cph_context_GridView1")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty,
                               infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        TableRow tr = table.Rows[j];
                        headName    = tr.Columns[1].ToNodePlainString();
                        releaseTime = tr.Columns[2].ToNodePlainString();
                        infoType    = "政策法规";
                        infoUrl     = "http://www.dgzb.com.cn/DGJYWEB/SiteManage/" + tr.Columns[1].GetATagHref();
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = ToolHtml.GetHtmlByUrl(infoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList noList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "ctl00_cph_context_span_MetContent")));
                        if (noList != null && noList.Count > 0)
                        {
                            ctxHtml    = noList.AsHtml().Replace("<br/>", "\r\n").Replace("<BR/>", "");
                            infoCtx    = noList.AsString().Replace(" ", "").Replace("&nbsp;", "").Replace("\t\t", "\t").Replace("\t\t", "\t");
                            infoCtx    = Regex.Replace(infoCtx, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase).Replace(" ", "").Replace("\t", "").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n");
                            msgType    = MsgTypeCosnt.DongGuanMsgType;
                            infoScorce = infoScorce.Replace("&nbsp;", "");
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "东莞市区", string.Empty, infoCtx, infoType);

                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            else
                            {
                                sqlCount++;
                                if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                                {
                                    parser = new Parser(new Lexer(htldtl));
                                    NodeList attachList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_cph_context_DownLoadFiles1_GridView1")));
                                    if (attachList != null && attachList.Count > 0)
                                    {
                                        TableTag tabTag = attachList[0] as TableTag;
                                        for (int k = 1; k < tabTag.RowCount; k++)
                                        {
                                            TableRow dr = tabTag.Rows[k];
                                            try
                                            {
                                                string     attName  = string.IsNullOrEmpty(dr.Columns[1].ToNodePlainString()) ? headName : dr.Columns[1].ToNodePlainString();
                                                BaseAttach baseInfo = ToolHtml.GetBaseAttachByUrl("http://www.dgzb.com.cn/DGJYWEB/SiteManage/" + dr.Columns[1].GetATagHref(), attName, info.Id);
                                                if (baseInfo != null)
                                                {
                                                    ToolDb.SaveEntity(baseInfo, string.Empty);
                                                }
                                            }
                                            catch { }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
Пример #21
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "dlbsc-feiyeR")));

            if (sNode != null && sNode.Count > 0)
            {
                try
                {
                    string temp = sNode.AsString().ToRegString().GetRegexBegEnd("/", "跳");

                    pageInt = int.Parse(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://dalang.szlhxq.gov.cn/dlbsc/zwgk73/cgzb10/zbgz/13891-" + i + ".html", Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList viewList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "dlbsc_contUl")), true), new TagNameFilter("li")));

                if (viewList != null && viewList.Count > 0)
                {
                    for (int j = 0; j < viewList.Count; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        Regex regDate = new Regex(@"\d{4}-\d{1,2}-\d{1,2}");
                        beginDate = regDate.Match(viewList[j].ToNodePlainString()).Value;
                        prjName   = viewList[j].GetATag().LinkText;

                        InfoUrl = "http://dalang.szlhxq.gov.cn" + viewList[j].GetATagHref(0).Replace("./", "/");
                        string htmDtl = string.Empty;
                        try
                        {
                            htmDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8);
                            Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>|<style[^<]*</style>|<xml[^<]*</xml>");
                            htmDtl = regexHtml.Replace(htmDtl, "");
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmDtl));
                        NodeList dtl = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "dlbsc-content")));
                        if (dtl != null && dtl.Count > 0)
                        {
                            HtmlTxt    = System.Text.RegularExpressions.Regex.Replace(dtl.ToHtml(), "(<script)[\\s\\S]*?(</script>)", "");
                            inviteCtx  = HtmlTxt.ToCtxString();
                            inviteType = prjName.GetInviteBidType();
                            prjAddress = inviteCtx.GetAddressRegex();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            code       = inviteCtx.GetCodeRegex().GetCodeDel();
                            msgType    = "深圳市龙华新区大浪街道办事处";
                            if (string.IsNullOrEmpty(prjAddress))
                            {
                                prjAddress = "见招标信息";
                            }

                            specType   = "建设工程";
                            inviteType = "小型工程";
                            if (string.IsNullOrEmpty(buildUnit))
                            {
                                buildUnit = "深圳市龙华新区大浪街道办事处";
                            }
                            inviteType = ToolHtml.GetInviteType(inviteType);
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳区及街道工程", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Пример #22
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch { return(null); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "Pages")));

            if (pageNode != null && pageNode.Count > 0)
            {
                string temp = pageNode.AsString().GetRegexBegEnd("共有", "页");
                try
                {
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "?page=" + i);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "News_list")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;
                        INode  node = listNode[j];
                        releaseTime = node.ToPlainTextString().GetDateRegex();
                        ATag aTag = node.GetATag();
                        headName = aTag.GetAttribute("title");
                        infoUrl  = "http://cgzx.baoan.gov.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "Scroller-1")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            ctxHtml = dtlNode.AsHtml();
                            infoCtx = ctxHtml.ToCtxString();
                            msgType = "深圳市宝安区政府采购中心";
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "深圳政府采购", "宝安区", infoCtx, "通知公告");
                            sqlCount++;
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                            {
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag fileATag = aNode[k].GetATag();
                                        if (fileATag.IsAtagAttach())
                                        {
                                            BaseAttach obj = null;
                                            if (fileATag.Link.ToLower().Contains("http"))
                                            {
                                                obj = ToolHtml.GetBaseAttach(fileATag.Link, headName, info.Id);
                                            }
                                            else
                                            {
                                                obj = ToolHtml.GetBaseAttach("http://ba.szzfcg.cn" + fileATag.Link, headName, info.Id);
                                            }
                                            if (obj != null)
                                            {
                                                ToolDb.SaveEntity(obj, string.Empty);
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
Пример #23
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new ArrayList();
            //取得页码
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string HtmlTxt         = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default);
            }
            catch (Exception ex)
            {
                return(list);
            }

            Parser   parser   = new Parser(new Lexer(html));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "749")));

            if (nodeList != null && nodeList.Count > 0)
            {
                HtmlTxt = nodeList.AsHtml();
                TableTag table = nodeList[0] as TableTag;
                //int rowIndex = 8;
                //for (int j = 6; j < table.RowCount - 3; j++)
                //{
                //    TableRow tr = table.Rows[j];
                //    if (tr.ToPlainTextString().Contains("中标通知书"))
                //    {
                //        rowIndex = j+2;
                //    }

                //}

                for (int j = 13; j < table.RowCount - 3; j++)
                {
                    TableRow tr = table.Rows[j];
                    if (tr.ToPlainTextString().Contains("注:"))
                    {
                        continue;
                    }
                    if (tr.ToPlainTextString().Contains("中标通知书"))
                    {
                        j++;
                        continue;
                    }

                    string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty,
                           code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                           bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty,
                           bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty;

                    code    = tr.Columns[2].ToPlainTextString().Trim();
                    prjName = tr.Columns[3].ToPlainTextString().Trim();
                    bidUnit = tr.Columns[4].ToPlainTextString().Trim();
                    string bid = tr.Columns[5].ToPlainTextString().Trim();
                    beginDate = tr.Columns[7].ToPlainTextString().Trim();
                    InfoUrl   = "http://www.ymcw.com/message2.htm";

                    HtmlTxt  = string.Format("<p>招标编号:{0}<br/>项目名称:{1}<br/>中标单位:{2}<br/>中标项目:{3}<br/>中标时间:{4}<br/></p>", code, prjName, bidUnit, bid, beginDate);
                    bidCtx   = string.Format("招标编号:{0}\r\n项目名称:{1}\r\n中标单位:{2}\r\n中标时间:{3}\r\n", code, prjName, bidUnit, beginDate);
                    specType = "其他";
                    msgType  = "深圳市裕明财务咨询有限公司";
                    bidType  = ToolHtml.GetInviteTypes(prjName);
                    prjName  = ToolDb.GetPrjName(prjName);
                    if (prjName.Contains("深圳市人民检察院电子物证设备"))
                    {
                        continue;
                    }
                    BidInfo info = ToolDb.GenBidInfo("广东省", "深圳社会招标", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                    list.Add(info);
                    if (!crawlAll && list.Count >= this.MaxCount)
                    {
                        return(list);
                    }
                }
            }
            return(list);
        }
Пример #24
0
        private void SaveAttach(BidProject info, string htmltxt, string result, bool isUpdate)
        {
            List <BaseAttach> list = new List <BaseAttach>();

            if (htmltxt.Contains("http"))
            {
                Parser   parser = new Parser(new Lexer(htmltxt));
                NodeList aNode  = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                if (aNode != null && aNode.Count > 0)
                {
                    for (int j = 0; j < aNode.Count; j++)
                    {
                        ATag   aTag       = aNode[j].GetATag();
                        string attachName = aTag.LinkText;
                        string aurl       = string.Empty;
                        if (!aTag.Link.ToLower().Contains("http"))
                        {
                            aurl = "https://www.szjsjy.com.cn:8001/" + aTag.Link.GetReplace("\\");
                        }
                        else
                        {
                            aurl = aTag.Link.GetReplace("\\");
                        }
                        if (string.IsNullOrWhiteSpace(attachName))
                        {
                            attachName = info.PrjName;
                        }
                        try
                        {
                            string   url  = System.Web.HttpUtility.UrlDecode(aurl);
                            string[] urls = url.Split('&');
                            url = urls[0] + "&" + urls[2] + "&" + urls[1];
                            BaseAttach entity = null;
                            if (isUpdate)
                            {
                                entity = ToolHtml.GetBaseAttach(url.Replace("\"", ""), attachName, result, "SiteManage\\Files\\Attach\\");
                            }
                            else
                            {
                                entity = ToolHtml.GetBaseAttach(url.Replace("\"", ""), attachName, info.Id, "SiteManage\\Files\\Attach\\");
                            }
                            if (entity != null)
                            {
                                list.Add(entity);
                            }
                        }
                        catch { }
                    }
                }
            }
            else
            {
                System.Data.DataTable dtlDtl = ToolHtml.JsonToDataTable(htmltxt);

                if (dtlDtl != null && dtlDtl.Rows.Count > 0)
                {
                    for (int i = 0; i < dtlDtl.Rows.Count; i++)
                    {
                        System.Data.DataRow row = dtlDtl.Rows[i];
                        string attachName       = Convert.ToString(row["attachName"]);
                        if (string.IsNullOrWhiteSpace(attachName))
                        {
                            attachName = info.PrjName;
                        }
                        string attachGuid = Convert.ToString(row["attachGuid"]);
                        string url        = "https://www.szjsjy.com.cn:8001/file/downloadFile?fileId=" + attachGuid;
                        try
                        {
                            BaseAttach entity = null;
                            if (isUpdate)
                            {
                                entity = ToolHtml.GetBaseAttachByUrl(url, attachName, result, "SiteManage\\Files\\Attach\\");
                            }
                            else
                            {
                                entity = ToolHtml.GetBaseAttachByUrl(url, attachName, info.Id, "SiteManage\\Files\\Attach\\");
                            }
                            if (entity != null)
                            {
                                list.Add(entity);
                            }
                        }
                        catch { }
                    }
                }
            }
            if (list.Count > 0)
            {
                if (isUpdate)
                {
                    string delSql = string.Format("delete from BaseAttach where SourceID='{0}'", result);
                    ToolFile.Delete(result);
                    int count = ToolDb.ExecuteSql(delSql);
                }
                foreach (BaseAttach attach in list)
                {
                    ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                }
            }
        }
Пример #25
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            //取得页码
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch (Exception ex)
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "scott")), true), new TagNameFilter("a")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    string temp = pageList[pageList.Count - 1].GetATagValue().Replace("(", "kdxx").Replace(")", "xxdk").GetRegexBegEnd("kdxx", "xxdk");
                    pageInt = Convert.ToInt32(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "lefttable")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty,
                               infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        TableRow tr = table.Rows[j];
                        infoType    = "办事指南";
                        headName    = tr.Columns[1].ToNodePlainString();
                        releaseTime = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        infoUrl     = tr.Columns[1].GetATagHref();
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString().Replace("<?xml:namespace prefix = o ns = \"urn:schemas-microsoft-com:office:office\" />", "");
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "context_div")));
                        if (dtlList != null && dtlList.Count > 0)
                        {
                            ctxHtml = dtlList.AsHtml();
                            infoCtx = ctxHtml.ToCtxString();
                            msgType = MsgTypeCosnt.ZhongShanMsgType;
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "中山市区", string.Empty, infoCtx, infoType);
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            else
                            {
                                sqlCount++;
                                if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                                {
                                    parser = new Parser(new Lexer(ctxHtml));
                                    NodeList imgList = parser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                                    if (imgList != null && imgList.Count > 0)
                                    {
                                        for (int img = 0; img < imgList.Count; img++)
                                        {
                                            ImageTag   imgTag   = imgList[img] as ImageTag;
                                            BaseAttach baseInfo = ToolHtml.GetBaseAttachByUrl(imgTag.GetAttribute("src"), headName, info.Id);
                                            if (baseInfo != null)
                                            {
                                                ToolDb.SaveEntity(baseInfo, string.Empty);
                                            }
                                        }
                                    }
                                    parser = new Parser(new Lexer(ctxHtml));
                                    NodeList attachList = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                    if (attachList != null && attachList.Count > 0)
                                    {
                                        for (int a = 0; a < attachList.Count; a++)
                                        {
                                            ATag aTag = attachList[a] as ATag;
                                            if (aTag.IsAtagAttach())
                                            {
                                                BaseAttach obj = ToolHtml.GetBaseAttachByUrl(aTag.Link, aTag.LinkText, info.Id);
                                                if (obj != null)
                                                {
                                                    ToolDb.SaveEntity(obj, string.Empty);
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
Пример #26
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default, ref cookiestr);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(htl));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "dnn_ctr395_ProjectList_pager")));

            if (nodeList != null && nodeList.Count > 0)
            {
                Regex regDate = new Regex(@"\d下一页");
                page = Convert.ToInt32(regDate.Match(nodeList.AsString().Trim()).ToString().Replace("下一页", "").Trim());
            }
            for (int i = 1; i <= page; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl + "&page=" + i.ToString()), Encoding.Default);
                    }
                    catch (Exception ex) { continue; }
                }
                parser = new Parser(new Lexer(htl));
                NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "dnn_ctr395_ProjectList_grdData")));
                if (tableNodeList != null && tableNodeList.Count > 0)
                {
                    TableTag table = (TableTag)tableNodeList[0];
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr = table.Rows[j];
                        code      = tr.Columns[0].ToPlainTextString().Trim();
                        prjName   = tr.Columns[1].ToPlainTextString().Trim();
                        endDate   = tr.Columns[4].ToPlainTextString().Replace("&nbsp; ", "").Trim().Substring(0, 10);
                        beginDate = tr.Columns[3].ToPlainTextString().Replace("&nbsp; ", "").Trim().Substring(0, 10);
                        ATag aTag = tr.Columns[1].SearchFor(typeof(ATag), true)[0] as ATag;

                        InfoUrl = "http://zb.zjcic.net" + aTag.Link.Replace("amp;", "").Trim();
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(InfoUrl), Encoding.Default).Replace("&nbsp;", "");
                        }
                        catch (Exception)
                        {
                            Logger.Error("InviteZhJianJS");
                            continue;
                        }
                        Parser   parserdetail = new Parser(new Lexer(htmldetail));
                        NodeList dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "dnn_ctr408_ProjectView_INSTRUCTION")));
                        if (dtnode.Count > 0)
                        {
                            HtmlTxt   = dtnode.AsHtml();
                            inviteCtx = dtnode.AsString().Replace("&#160;", "").Trim();
                            Regex regBuidUnit = new Regex(@"(招标单位|招标人):[^\r\n]+\r\n");
                            buildUnit = regBuidUnit.Match(inviteCtx).Value.Replace("招标单位:", "").Replace(":", "").Replace("&#160;", "").Trim();
                            if (buildUnit == "")
                            {
                                buildUnit = "";
                            }
                            Regex regPrjAddr = new Regex(@"(工程地点|工程地址|工程建设地点)(:|:)[^\r\n]+\r\n");
                            prjAddress = regPrjAddr.Match(inviteCtx).Value.Replace("工程地点:", "").Replace("工程地址", "").Replace("工程建设地点", "").Replace(":", "").Trim();
                            msgType    = "湛江市建设工程交易中心";
                            specType   = "建设工程";
                            if (prjAddress == "")
                            {
                                prjAddress = "见招标信息";
                            }
                            inviteType = ToolHtml.GetInviteTypes(prjName);
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "湛江市区", "",
                                                                   string.Empty, code, prjName, prjAddress, buildUnit,
                                                                   beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Пример #27
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new List <BidInfo>();
            Dictionary <string, string> citys = this.GetCitys();

            foreach (string area in citys.Keys)
            {
                int    count           = 0;
                int    pageInt         = 1;
                string html            = string.Empty;
                string viewState       = string.Empty;
                string eventValidation = string.Empty;
                string cookiestr       = string.Empty;
                try
                {
                    html = this.ToolWebSite.GetHtmlByUrl(citys[area], Encoding.UTF8, ref cookiestr);
                }
                catch { return(list); }
                Parser   parser   = new Parser(new Lexer(html));
                NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("nowrap", "true")));
                if (pageNode != null && pageNode.Count > 0)
                {
                    try
                    {
                        string temp = pageNode.AsString().GetRegexBegEnd("总页数", "当前页").Replace(":", "");
                        pageInt = int.Parse(temp);
                    }
                    catch { }
                }
                for (int i = 1; i <= pageInt; i++)
                {
                    if (i > 1)
                    {
                        viewState       = this.ToolWebSite.GetAspNetViewState(html);
                        eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                        string viewSTATEGENERATOR = ToolHtml.GetHtmlInputValue(html, "__VIEWSTATEGENERATOR");
                        NameValueCollection nvc   = this.ToolWebSite.GetNameValueCollection(new string[] {
                            "__VIEWSTATE",
                            "__VIEWSTATEGENERATOR",
                            "__EVENTTARGET",
                            "__EVENTARGUMENT",
                            "__EVENTVALIDATION",
                            "MoreInfoList1$txtTitle"
                        },
                                                                                            new string[] {
                            viewState,
                            viewSTATEGENERATOR,
                            "MoreInfoList1$Pager",
                            i.ToString(),
                            eventValidation,
                            ""
                        });
                        try
                        {
                            html = this.ToolWebSite.GetHtmlByUrl(citys[area], nvc, Encoding.UTF8, ref cookiestr);
                        }
                        catch { continue; }
                    }
                    parser = new Parser(new Lexer(html));
                    NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1")));
                    if (listNode != null && listNode.Count > 0)
                    {
                        TableTag table = listNode[0] as TableTag;
                        for (int j = 1; j < table.RowCount; j++)
                        {
                            string prjName = string.Empty,
                                   buildUnit = string.Empty, bidUnit = string.Empty,
                                   bidMoney = string.Empty, code = string.Empty,
                                   bidDate = string.Empty, beginDate = string.Empty,
                                   endDate = string.Empty, bidType = string.Empty,
                                   specType = string.Empty, InfoUrl = string.Empty,
                                   msgType = string.Empty, bidCtx = string.Empty,
                                   prjAddress = string.Empty, remark = string.Empty,
                                   prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                            TableRow tr   = table.Rows[j];
                            ATag     aTag = tr.Columns[1].GetATag();
                            prjName   = aTag.GetAttribute("title").GetReplace("【正在报名】,【报名结束】");
                            beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                            InfoUrl   = "http://www.gxzbtb.cn" + aTag.Link;
                            string htmldtl = string.Empty;
                            try
                            {
                                htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                            }
                            catch { continue; }
                            parser = new Parser(new Lexer(htmldtl));
                            NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent")));
                            if (dtlNode != null && dtlNode.Count > 0)
                            {
                                HtmlTxt    = dtlNode.AsHtml();
                                bidCtx     = HtmlTxt.GetReplace(new string[] { "<br/>", "<br />", "<br>" }, "\r\n").ToCtxString();
                                prjAddress = bidCtx.GetAddressRegex();
                                buildUnit  = bidCtx.GetBuildRegex();
                                bidUnit    = bidCtx.GetBidRegex();
                                bidMoney   = bidCtx.GetMoneyRegex();
                                prjMgr     = bidCtx.GetMgrRegex();
                                code       = bidCtx.GetCodeRegex().GetCodeDel();

                                if (string.IsNullOrEmpty(bidUnit))
                                {
                                    parser = new Parser(new Lexer(HtmlTxt));
                                    NodeList bidNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                                    if (bidNode != null && bidNode.Count > 0)
                                    {
                                        string   ctx      = string.Empty;
                                        TableTag bidTable = bidNode[0] as TableTag;
                                        for (int r = 0; r < bidTable.RowCount; r++)
                                        {
                                            for (int c = 0; c < bidTable.Rows[r].ColumnCount; c++)
                                            {
                                                if ((c + 1) % 2 == 0)
                                                {
                                                    ctx += bidTable.Rows[r].Columns[c].ToNodePlainString() + "\r\n";
                                                }
                                                else
                                                {
                                                    ctx += bidTable.Rows[r].Columns[c].ToNodePlainString() + ":";
                                                }
                                            }
                                        }

                                        bidUnit = ctx.GetBidRegex();
                                        if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0")
                                        {
                                            bidMoney = ctx.GetMoneyString().GetMoney("万元");
                                        }
                                        if (string.IsNullOrEmpty(prjAddress))
                                        {
                                            prjAddress = ctx.GetAddressRegex();
                                        }
                                        if (string.IsNullOrEmpty(buildUnit))
                                        {
                                            buildUnit = ctx.GetBuildRegex();
                                        }
                                        if (string.IsNullOrEmpty(code))
                                        {
                                            code = ctx.GetCodeRegex().GetCodeDel();
                                        }
                                        if (bidUnit.Contains("推荐") || bidUnit.Contains("中标") || bidUnit.Contains("地址"))
                                        {
                                            bidUnit = string.Empty;
                                        }
                                        if (string.IsNullOrEmpty(bidUnit))
                                        {
                                            if (bidTable.RowCount > 1)
                                            {
                                                ctx = string.Empty;
                                                for (int d = 0; d < bidTable.Rows[0].ColumnCount; d++)
                                                {
                                                    ctx += bidTable.Rows[0].Columns[d].ToNodePlainString() + ":";
                                                    try
                                                    {
                                                        ctx += bidTable.Rows[1].Columns[d].ToNodePlainString() + "\r\n";
                                                    }
                                                    catch { }
                                                }
                                                bidUnit = ctx.GetBidRegex();
                                                if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0")
                                                {
                                                    bidMoney = ctx.GetMoneyString().GetMoney();
                                                }
                                                if (string.IsNullOrEmpty(prjAddress))
                                                {
                                                    prjAddress = ctx.GetAddressRegex();
                                                }
                                                if (string.IsNullOrEmpty(buildUnit))
                                                {
                                                    buildUnit = ctx.GetBuildRegex();
                                                }
                                                if (string.IsNullOrEmpty(code))
                                                {
                                                    code = ctx.GetCodeRegex().GetCodeDel();
                                                }
                                            }
                                        }
                                    }
                                }
                                try
                                {
                                    if (decimal.Parse(bidMoney) > 10000)
                                    {
                                        bidMoney = (decimal.Parse(bidMoney) / 10000).ToString();
                                    }
                                }
                                catch { }
                                bidUnit = bidUnit.Replace("名称", "").Replace("单位", "").Replace("№", "").Replace("1", "").Replace("2", "").Replace("联合体", "").Replace("(", "");

                                if (bidUnit.Contains("公司"))
                                {
                                    bidUnit = bidUnit.Remove(bidUnit.IndexOf("公司")) + "公司";
                                }
                                if (bidUnit.Contains("研究院"))
                                {
                                    bidUnit = bidUnit.Remove(bidUnit.IndexOf("研究院")) + "研究院";
                                }
                                if (bidUnit.Contains("研究所"))
                                {
                                    bidUnit = bidUnit.Remove(bidUnit.IndexOf("研究所")) + "研究所";
                                }
                                bidType  = "水利工程";
                                specType = "建设工程";
                                msgType  = "广西壮族自治区公共资源交易中心";
                                BidInfo info = ToolDb.GenBidInfo("广西壮族自治区", "广西壮族自治区及地市", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                                list.Add(info);
                                count++;
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k] as ATag;
                                        if (a.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = a.Link;
                                            }
                                            else
                                            {
                                                link = "http://www.gxzbtb.cn/" + a.Link.GetReplace("../,./");
                                            }
                                            if (Encoding.Default.GetByteCount(link) > 500)
                                            {
                                                continue;
                                            }
                                            BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                            base.AttachList.Add(attach);
                                        }
                                    }
                                }
                                if (!crawlAll && count >= this.MaxCount)
                                {
                                    goto Funcs;
                                }
                            }
                        }
                    }
                }
                Funcs :;
            }
            return(list);
        }
Пример #28
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookieStr       = string.Empty;
            int    pageInt         = 1;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default).GetJsString();
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "toptd1")), true), new TagNameFilter("a")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    string temp = pageList[pageList.Count - 1].GetATagValue();
                    pageInt = Convert.ToInt32(temp.Replace("javascript:goPage(", "").Replace(")", ""));
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        string typeId           = ToolHtml.GetHtmlInputValue(html, "typeId");
                        string boardId          = ToolHtml.GetHtmlInputValue(html, "boardId");
                        string totalRows        = ToolHtml.GetHtmlInputValue(html, "totalRows");
                        NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(
                            new string[] {
                            "typeId", "boardId", "newstitle", "sTime", "eTime", "totalRows", "pageNO"
                        },
                            new string[] { typeId, boardId, "", "", "", totalRows, i.ToString() }
                            );
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "lefttable")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        string InfoTitle = string.Empty, InfoType = string.Empty, PublistTime = string.Empty, InfoCtx = string.Empty, InfoUrl = string.Empty, prjCode = string.Empty, buildUnit = string.Empty, htmlTxt = string.Empty;

                        TableRow tr = table.Rows[j];
                        InfoTitle = tr.Columns[1].ToNodePlainString();
                        string endDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoType = "资格预审";
                        InfoUrl  = tr.Columns[1].GetATagHref();

                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }

                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "context_div")));
                        if (dtlList != null && dtlList.Count > 0)
                        {
                            htmlTxt     = dtlList.ToHtml();
                            InfoCtx     = dtlList.ToHtml().ToCtxString().Replace("<?xml:namespace prefix = o ns = \"urn:schemas-microsoft-com:office:office\" />", "");
                            PublistTime = InfoCtx.GetDateRegex("yyyy年MM月dd日").Replace("年", "-").Replace("月", "-").Replace("日", "");
                            if (string.IsNullOrEmpty(PublistTime))
                            {
                                PublistTime = InfoCtx.GetDateRegex();
                            }
                            if (string.IsNullOrEmpty(PublistTime))
                            {
                                PublistTime = endDate;
                            }
                            NoticeInfo info = ToolDb.GenNoticeInfo("广东省", "惠州市区", "龙门县", string.Empty, InfoTitle, InfoType, InfoCtx, PublistTime, string.Empty, MsgTypeCosnt.HuiZhouMsgType, InfoUrl, prjCode, buildUnit, string.Empty, string.Empty, string.Empty, string.Empty, htmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Пример #29
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new ArrayList();
            //取得页码
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default);
            }
            catch (Exception ex)
            {
                return(list);
            }

            Parser   parser  = new Parser(new Lexer(html));
            NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "anp1")));

            if (tdNodes != null)
            {
                string pageTemp = tdNodes.AsString().Replace("&nbsp;", "").Trim();
                Regex  regpage  = new Regex(@"当前第[^页]+页");
                try
                {
                    pageInt = int.Parse(regpage.Match(pageTemp).Value.Split('/')[1].Replace("页", "").Trim());
                }
                catch (Exception ex) { }
                string cookiestr = string.Empty;
                for (int i = 1; i <= pageInt; i++)
                {
                    if (i > 1)
                    {
                        viewState       = this.ToolWebSite.GetAspNetViewState(html);
                        eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                        NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                            "anp1_input",
                            "Columnlist2_DepartmentTreeView_CheckedList",
                            "Columnlist2_DepartmentTreeView_EditEvents",
                            "Columnlist2_DepartmentTreeView_ExpandedList",
                            "Columnlist2_DepartmentTreeView_MoveEvents",
                            "Columnlist2_DepartmentTreeView_MultipleSelectedList",
                            "Columnlist2_DepartmentTreeView_ScrollData",
                            "Columnlist2_DepartmentTreeView_SelectedNode",
                            "Columnlist2_DepartmentTreeView_ValueChangeEvents",
                            "Login2:txtPassword",
                            "Login2:txtUserName",
                            "__EVENTARGUMENT",
                            "__EVENTTARGET",
                            "__VIEWSTATE"
                        }, new string[] {
                            (i - 1).ToString(),
                            string.Empty,
                            string.Empty,
                            string.Empty,
                            string.Empty,
                            string.Empty,
                            "0,0",
                            "p_379",
                            string.Empty,
                            string.Empty,
                            string.Empty,
                            i.ToString(),
                            "anp1",
                            viewState
                        });

                        try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.Default, ref cookiestr); }
                        catch (Exception ex) { continue; }
                    }
                    Regex regHTML1 = new Regex(@"<td>[^<]+<td>");
                    Regex regHTML2 = new Regex(@"</td>[^<]+</td>");
                    html   = regHTML2.Replace(regHTML1.Replace(html, "<td>"), "</td>");
                    parser = new Parser(new Lexer(html));
                    NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "dlstNews")));
                    if (nodeList != null)
                    {
                        if (nodeList != null && nodeList.Count > 0)
                        {
                            TableTag table = nodeList[0] as TableTag;
                            for (int j = 0; j < table.RowCount; j++)
                            {
                                string   code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                                TableRow tr = table.Rows[j];

                                beginDate = tr.Columns[1].ToPlainTextString().Trim();
                                ATag aTag = tr.Columns[0].SearchFor(typeof(ATag), true)[0] as ATag;
                                InfoUrl = "http://www.szgxzb.com/zbgg/" + aTag.Link;
                                string htmldetail = string.Empty;
                                try
                                {
                                    htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).Replace("&nbsp;", "").Trim();
                                    Parser   dtlparserHTML = new Parser(new Lexer(htmldetail));
                                    NodeList dtnodeHTML    = dtlparserHTML.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "newsContent"), new TagNameFilter("div")));
                                    HtmlTxt    = dtnodeHTML.AsHtml();
                                    htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).Replace("&nbsp;", "").Replace("</br>", "\r\n").Replace("<br>", "\r\n");
                                    Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>");
                                    htmldetail = regexHtml.Replace(htmldetail, "");
                                }
                                catch (Exception ex) { continue; }
                                Parser   dtlparser   = new Parser(new Lexer(htmldetail));
                                NodeList prjNameNode = dtlparser.ExtractAllNodesThatMatch(new TagNameFilter("title"));
                                prjName = prjNameNode.AsString().Replace("国信招标--", "");
                                dtlparser.Reset();
                                NodeList dtnode = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "newsContent"), new TagNameFilter("div")));

                                inviteCtx = dtnode.AsString();
                                Regex regcode = new Regex(@"(招标编号:[^)]+)");
                                code = regcode.Match(inviteCtx).Value.Replace("招标编号", "").Replace("(", "").Replace(")", "").Replace(":", "").Trim();
                                if (Encoding.Default.GetByteCount(code) > 50)
                                {
                                    code = "";
                                }
                                specType   = "其他";
                                msgType    = "深圳市国信招标有限公司";
                                inviteType = ToolHtml.GetInviteTypes(prjName);
                                InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳社会招标", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                                list.Add(info);
                                if (!crawlAll && list.Count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Пример #30
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "list_page")));

            if (sNode != null && sNode.Count > 0)
            {
                try
                {
                    string temp = sNode.AsString().Replace(",", "kdcc");
                    string te   = temp.GetRegexBegEnd("HTML", "kdcc").Replace("(", "");
                    pageInt = Convert.ToInt32(te);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://xajdb.baoan.gov.cn/xxgk_11984/ywxx/zbcg/zbxxgg/index_" + (i - 1).ToString() + ".html", Encoding.UTF8);
                    }
                    catch
                    {
                        continue;
                    }
                }
                parser = new Parser(new Lexer(html));
                NodeList viewList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "right_list"))), new TagNameFilter("ul"))), new TagNameFilter("li")));
                if (viewList != null && viewList.Count > 0)
                {
                    for (int j = 0; j < viewList.Count; j++)
                    {
                        string prjName = string.Empty,
                               buildUnit = string.Empty, bidUnit = string.Empty,
                               bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty,
                               beginDate = string.Empty,
                               endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty,
                               msgType = string.Empty, bidCtx = string.Empty,
                               prjAddress = string.Empty, remark = string.Empty,
                               prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        Regex regDate = new Regex(@"\d{4}-\d{1,2}-\d{1,2}");
                        beginDate = regDate.Match(viewList[j].ToPlainTextString().Trim()).Value;
                        string temp = viewList[j].ToPlainTextString().Trim().Replace(beginDate, "");
                        try
                        {
                            string ahtml = viewList[j].ToHtml();
                            string aStr  = ahtml.Replace(";", "kdxx").GetRegexBegEnd("write", "kdxx").GetReplace("(,)");
                            ATag   atag  = ahtml.GetATag();
                            if (string.IsNullOrWhiteSpace(atag.Link))
                            {
                                ahtml = ahtml.Replace(aStr, "");
                                ahtml = ahtml.Replace("", "kdxx").GetRegexBegEnd("write", "kdxx").GetReplace("(,)");
                            }
                            int beg = temp.IndexOf("else"), end = temp.Length;
                            temp    = temp.Substring(beg, end - beg);
                            beg     = temp.IndexOf("<a");
                            end     = temp.IndexOf("/a>");
                            temp    = temp.Substring(beg, (end - beg) + 3);
                            beg     = temp.IndexOf(">");
                            end     = temp.IndexOf("</");
                            prjName = temp.Substring(beg + 1, end - beg - 1);
                            Parser   p    = new Parser(new Lexer(temp));
                            NodeList l    = p.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            ATag     aTag = l.SearchFor(typeof(ATag), true)[0] as ATag;
                            InfoUrl = "http://xajdb.baoan.gov.cn/xxgk_11984/ywxx/zbcg/zbxxgg/" + aTag.Link.Replace("../", "").Replace("./", "");
                        }
                        catch { continue; }
                        string htmDtl = string.Empty;
                        try
                        {
                            htmDtl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(InfoUrl), Encoding.UTF8);
                            htmDtl = Regex.Replace(htmDtl, "(<script)[\\s\\S]*?(</script>)", "");
                            Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>|<style[^<]*</style>|<xml[^<]*</xml>");
                            htmDtl = regexHtml.Replace(htmDtl, "");
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmDtl));
                        NodeList dtl = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "DivContent")));
                        if (dtl != null && dtl.Count > 0)
                        {
                            HtmlTxt = Regex.Replace(dtl.AsHtml(), "(<script)[\\s\\S]*?(</script>)", "");
                            parser  = new Parser(new Lexer(HtmlTxt.ToLower().Replace("th", "td")));
                            NodeList dtlTab = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                            if (dtlTab != null && dtlTab.Count > 0)
                            {
                                TableTag table = dtlTab[0] as TableTag;
                                for (int k = 0; k < table.RowCount; k++)
                                {
                                    for (int c = 0; c < table.Rows[k].ColumnCount; c++)
                                    {
                                        string strCtx = table.Rows[k].Columns[c].ToPlainTextString().Replace("&nbsp;", "").Replace(" ", "");
                                        if (strCtx == "工程类型")
                                        {
                                            break;
                                        }
                                        if (c % 2 == 0)
                                        {
                                            bidCtx += strCtx + ":";
                                        }
                                        else
                                        {
                                            bidCtx += strCtx + "\r\n";
                                        }
                                    }
                                }
                                bidCtx = bidCtx.Replace("\n", "").Replace("\r\n\r\n", "\r\n").Replace("\r", "\r\n") + "\r\n";
                            }
                            else
                            {
                                bidCtx = System.Text.RegularExpressions.Regex.Replace(HtmlTxt, "(<script)[\\s\\S]*?(</script>)", "");
                                bidCtx = Regex.Replace(bidCtx.Replace("<BR/>", "\r\n").Replace("<br/>", "\r\n").Replace("<BR>", "\r\n").Replace("<br>", "\r\n"), "<[^>]*>", "").Replace("&nbsp;", "").Replace(" ", "").Replace("\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\r\n", "\r\n") + "\r\n";
                            }
                            bidType = "工程";
                            if (prjName.Contains("施工"))
                            {
                                bidType = "施工";
                            }
                            if (prjName.Contains("监理"))
                            {
                                bidType = "监理";
                            }
                            if (prjName.Contains("设计"))
                            {
                                bidType = "设计";
                            }
                            if (prjName.Contains("勘察"))
                            {
                                bidType = "勘察";
                            }
                            if (prjName.Contains("服务"))
                            {
                                bidType = "服务";
                            }
                            if (prjName.Contains("劳务分包"))
                            {
                                bidType = "劳务分包";
                            }
                            if (prjName.Contains("专业分包"))
                            {
                                bidType = "专业分包";
                            }
                            if (prjName.Contains("小型施工"))
                            {
                                bidType = "小型工程";
                            }
                            if (prjName.Contains("设备材料"))
                            {
                                bidType = "设备材料";
                            }
                            Regex regPrjCode = new Regex(@"(工程编号|项目编号|招标编号|中标编号|编号)(:|:)[^\r\n]+\r\n");
                            code = regPrjCode.Match(bidCtx.Replace(" ", "")).Value.Replace("工程编号", "").Replace("项目编号", "").Replace("招标编号", "").Replace("中标编号", "").Replace("编号", "").Replace(":", "").Replace(":", "").Trim();

                            Regex regBuidUnit = new Regex(@"(建设单位|招标人|承包人|招标单位|招标方|招标代理机构)(:|:)[^\r\n]+\r\n");
                            buildUnit = regBuidUnit.Match(bidCtx.Replace(" ", "")).Value.Replace("招标代理机构", "").Replace("建设单位", "").Replace("招标人", "").Replace("承包人", "").Replace("招标单位", "").Replace("招标方", "").Replace(":", "").Replace(":", "").Trim();

                            Regex regMoney = new Regex(@"(中标价|投标价|总投资|发包价|投标报价|价格|金额)(:|:|)[^\r\n]+\r\n");
                            bidMoney = regMoney.Match(bidCtx.Replace(" ", "")).Value.Replace("中标价", "").Replace("总投资", "").Replace("发包价", "").Replace("投标报价", "").Replace("投标价", "").Replace("价格", "").Replace("金额", "").Replace(":", "").Replace(":", "").Trim();

                            Regex regBidUnit = new Regex(@"(第一候选人|中标候选人|中标单位|中标人|中标方)(:|:)[^\r\n]+\r\n");
                            bidUnit = regBidUnit.Match(bidCtx.Replace(" ", "")).Value.Replace("中标候选人", "").Replace("第一候选人", "").Replace("中标单位", "").Replace("中标人", "").Replace("中标方", "").Replace(":", "").Replace(":", "").Trim();

                            Regex regprjMgr = new Regex(@"(项目经理姓名|项目经理|项目负责人|项目总监|建造师|总工程师|监理师)(:|:)[^\r\n]+\r\n");
                            prjMgr = regprjMgr.Match(bidCtx.Replace(" ", "")).Value.Replace("项目经理姓名", "").Replace("总工程师", "").Replace("项目经理", "").Replace("项目总监", "").Replace("建造师", "").Replace("监理师", "").Replace("项目负责人", "").Replace(":", "").Replace(":", "").Trim();

                            Regex regBidMoney = new Regex(@"[0-9]+[.]{0,1}[0-9]+");
                            if (bidMoney.Contains("万"))
                            {
                                bidMoney = bidMoney.Remove(bidMoney.IndexOf("万")).Trim();
                                bidMoney = regBidMoney.Match(bidMoney).Value;
                            }
                            else
                            {
                                try
                                {
                                    bidMoney = (decimal.Parse(regBidMoney.Match(bidMoney).Value) / 10000).ToString();
                                    if (decimal.Parse(bidMoney) < decimal.Parse("0.1"))
                                    {
                                        bidMoney = "0";
                                    }
                                }
                                catch (Exception)
                                {
                                    bidMoney = "0";
                                }
                            }
                            if (prjMgr.Contains("资格"))
                            {
                                prjMgr = prjMgr.Remove(prjMgr.IndexOf("资格"));
                            }


                            if (string.IsNullOrEmpty(bidUnit) && string.IsNullOrEmpty(buildUnit))
                            {
                                parser = new Parser(new Lexer(HtmlTxt.ToLower().Replace("th", "td").Replace("</p>", "\r\n").Replace("<br/>", "\r\n").Replace("<br>", "\r\n")));
                                dtlTab = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                                if (dtlTab != null && dtlTab.Count > 0)
                                {
                                    bidCtx = "";
                                    TableTag table = dtlTab[0] as TableTag;
                                    for (int k = 0; k < table.RowCount; k++)
                                    {
                                        for (int c = 0; c < table.Rows[k].ColumnCount; c++)
                                        {
                                            string strCtx = table.Rows[k].Columns[c].ToPlainTextString().Replace("&nbsp;", "").Replace(" ", "").Replace("\n", "\r\n");
                                            if (strCtx == "工程类型")
                                            {
                                                break;
                                            }
                                            if (c % 2 == 0)
                                            {
                                                bidCtx += strCtx + ":";
                                            }
                                            else
                                            {
                                                bidCtx += strCtx + "\r\n";
                                            }
                                        }
                                    }
                                    bidCtx = bidCtx.Replace("\n", "").Replace("\r\n\r\n", "\r\n").Replace("\r", "\r\n") + "\r\n";
                                }
                                regBuidUnit = new Regex(@"(建设单位|招标人|承包人|招标单位|招标方|招标代理机构)(:|:)[^\r\n]+\r\n");
                                buildUnit   = regBuidUnit.Match(bidCtx.Replace(" ", "")).Value.Replace("招标代理机构", "").Replace("建设单位", "").Replace("招标人", "").Replace("承包人", "").Replace("招标单位", "").Replace("招标方", "").Replace(":", "").Replace(":", "").Trim();

                                regBidUnit = new Regex(@"(第一候选人|中标候选人|中标单位|中标人|中标方)(:|:)[^\r\n]+\r\n");
                                bidUnit    = regBidUnit.Match(bidCtx.Replace(" ", "")).Value.Replace("中标候选人", "").Replace("第一候选人", "").Replace("中标单位", "").Replace("中标人", "").Replace("中标方", "").Replace(":", "").Replace(":", "").Trim();
                            }
                            buildUnit = ToolHtml.GetSubString(buildUnit, 150);
                            bidUnit   = ToolHtml.GetSubString(bidUnit, 150);
                            code      = ToolHtml.GetSubString(code, 50);
                            prjMgr    = ToolHtml.GetSubString(prjMgr, 50);
                            if (string.IsNullOrEmpty(buildUnit))
                            {
                                buildUnit = "深圳市宝安区新安街道办事处";
                            }

                            msgType  = "深圳市宝安区新安街道办事处";
                            specType = "建设工程";
                            bidType  = "小型工程";
                            prjName  = ToolDb.GetPrjName(prjName);
                            BidInfo info = ToolDb.GenBidInfo("广东省", "深圳区及街道工程", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType,
                                                             bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }