Ejemplo n.º 1
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("vAlign", "bottom")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("总页数:", "当前");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState = this.ToolWebSite.GetAspNetViewState(html);
                    string __CSRFTOKEN      = ToolHtml.GetHtmlInputValue(html, "__CSRFTOKEN");
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__CSRFTOKEN",
                        "__VIEWSTATE",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT"
                    },
                                                                                      new string[] {
                        __CSRFTOKEN,
                        viewState,
                        "MoreInfoList1$Pager",
                        i.ToString()
                    });
                    try
                    {
                        cookiestr = cookiestr.GetReplace(new string[] { "path=/;", "HttpOnly", "," });
                        html      = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty,
                               city = string.Empty;

                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        prjName   = aTag.GetAttribute("title");
                        beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://www.spprec.com" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "ivs_content")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt   = dtlNode.AsHtml();
                            inviteCtx = HtmlTxt.GetReplace("<br />,<br/>,<br>,</p>", "\r\n").ToCtxString();
                            buildUnit = inviteCtx.GetBuildRegex();
                            if (buildUnit.Contains("联系"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("联系"));
                            }
                            if (buildUnit.Contains("地址"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址"));
                            }

                            prjAddress = inviteCtx.GetAddressRegex();
                            code       = inviteCtx.GetCodeRegex().GetCodeDel();

                            specType = inviteType = "政府采购";
                            msgType  = "四川省公共资源交易中心";
                            InviteInfo info = ToolDb.GenInviteInfo("四川省", "四川省及地市", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag tag = aNode[k] as ATag;
                                    if (tag.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (tag.Link.ToLower().Contains("http"))
                                        {
                                            link = tag.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.spprec.com" + tag.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(tag.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 2
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list    = new ArrayList();
            int    pageInt = 1;
            string html    = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "pages-list")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("/", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    string pageUrl = string.Format("http://www.gzjyfw.gov.cn/gcms/queryZjt_" + i + ".jspx?title=&businessCatalog=&businessType=JYGG&inDates=0&ext=&origin=ALL");
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(pageUrl);
                    }
                    catch
                    {
                        continue;
                    }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("id", "news_list1")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string prjName = string.Empty;
                        ATag   aTag    = listNode[j].GetATag();
                        prjName = aTag.GetAttribute("title");
                        string code = string.Empty, buildUnit = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;

                        beginDate = listNode[j].ToPlainTextString().GetDateRegex();
                        area      = listNode[j].GetSpan().ToNodePlainString();
                        InfoUrl   = aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "contents")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt   = System.Web.HttpUtility.HtmlDecode(dtlNode.AsHtml()).Replace(" ", "");;
                            inviteCtx = HtmlTxt.ToCtxString().Replace(" ", "");;

                            code       = inviteCtx.GetCodeRegex();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            prjAddress = inviteCtx.GetAddressRegex();
                            specType   = "建设工程";
                            inviteType = prjName.GetInviteBidType();
                            msgType    = "贵州省住房和城乡建设厅";
                            if (buildUnit.Contains("运输局"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("运输局")) + "运输局";
                            }
                            if (buildUnit.Contains("管理局"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("管理局")) + "管理局";
                            }
                            if (buildUnit.Contains("公司"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                            }
                            if (buildUnit.Contains("招标代理"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理"));
                            }
                            InviteInfo info = ToolDb.GenInviteInfo("贵州省", "贵州省及地市", area, string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int a = 0; a < aNode.Count; a++)
                                {
                                    ATag fileTag = aNode[a].GetATag();
                                    if (fileTag.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (fileTag.Link.Contains("http"))
                                        {
                                            link = fileTag.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.gzjyfw.gov.cn/" + fileTag.Link;
                                        }
                                        base.AttachList.Add(ToolDb.GenBaseAttach(fileTag.LinkText, info.Id, link));
                                    }
                                }
                            }
                            if (!crawlAll && list.Count > this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 3
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(htl));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("align", "center")));

            if (nodeList != null && nodeList.Count > 0)
            {
                Regex regexPage = new Regex(@"\d+页");
                page = int.Parse(regexPage.Match(nodeList.AsString()).Value.Trim(new char[] { '页' }));
            }
            for (int i = 1; i < page; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl + "&otype=&page=" + i.ToString()), Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(htl));
                NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("height", "23")));
                if (tableNodeList != null && tableNodeList.Count > 0)
                {
                    TableRow tr = new TableRow();
                    for (int j = 0; j < tableNodeList.Count; j++)
                    {
                        string   InfoUrl = string.Empty, tempName = string.Empty, tempDate = string.Empty;
                        TableTag table = tableNodeList.SearchFor(typeof(TableTag), true)[j] as TableTag;
                        for (int k = 0; k < 1; k++)
                        {
                            tr = table.Rows[k];
                            ATag   aTag = tr.Columns[1].GetATag();
                            string url  = "http://www.yjjs.gov.cn/news_Info.asp?rs_id=" + aTag.GetAttribute("onclick").Replace("titlelinks(", "");
                            int    ii   = url.LastIndexOf("''");

                            tempName = aTag.LinkText.ToNodeString();
                            tempDate = tr.Columns[2].ToNodePlainString().GetReplace(".", "-").GetDateRegex();
                            InfoUrl  = url.Remove(ii).Replace(",", "").Replace("'", "").Replace("javascript:", "").Trim();

                            string htmldetail = string.Empty;
                            try
                            {
                                htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                            }
                            catch
                            {
                                continue;
                            }
                            Parser   parserdetail = new Parser(new Lexer(htmldetail));
                            NodeList dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "content")), true), new TagNameFilter("table")));
                            if (dtnode != null && dtnode.Count > 0)
                            {
                                TableTag dtlTable = dtnode[0] as TableTag;
                                for (int r = 1; r < dtlTable.RowCount; r++)
                                {
                                    string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;


                                    for (int c = 1; c < dtlTable.Rows[r].ColumnCount; c++)
                                    {
                                        try
                                        {
                                            string temp  = dtlTable.Rows[r].Columns[c].ToNodePlainString();
                                            string title = dtlTable.Rows[0].Columns[c].ToNodePlainString();
                                            HtmlTxt += title + ":" + temp + "</br>";
                                            bidCtx  += title + ":" + temp + "\r\n";
                                        }
                                        catch { continue; }
                                    }

                                    prjName    = bidCtx.GetRegex("工程项目名称,项目名称,工程名称", true, 200);
                                    buildUnit  = bidCtx.GetRegex("建设单位");
                                    beginDate  = bidCtx.GetRegex("中标日期");
                                    bidMoney   = bidCtx.GetMoneyRegex();
                                    bidUnit    = bidCtx.GetRegex("中标单位名称");
                                    prjMgr     = bidCtx.GetMgrRegex();
                                    prjAddress = bidCtx.GetAddressRegex();
                                    bidType    = bidCtx.GetRegex("中标单位资质类别");

                                    msgType  = "阳江市建设工程交易中心";
                                    specType = "建设工程";

                                    BidInfo info = ToolDb.GenBidInfo("广东省", "阳江市区", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                                    list.Add(info);
                                    if (!crawlAll && list.Count >= this.MaxCount)
                                    {
                                        return(list);
                                    }
                                }
                            }
                            else
                            {
                                parserdetail.Reset();
                                NodeList dtlNode = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "content")));
                                if (dtlNode != null && dtlNode.Count > 0)
                                {
                                    string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                                    HtmlTxt = dtlNode.AsHtml();
                                    bidCtx  = HtmlTxt.ToCtxString();

                                    buildUnit  = bidCtx.GetBuildRegex();
                                    bidMoney   = bidCtx.GetMoneyRegex();
                                    bidUnit    = bidCtx.GetBidRegex();
                                    prjMgr     = bidCtx.GetMgrRegex();
                                    prjAddress = bidCtx.GetAddressRegex();

                                    bidType = tempName.GetInviteBidType();

                                    msgType  = "阳江市建设工程交易中心";
                                    specType = "建设工程";

                                    BidInfo info = ToolDb.GenBidInfo("广东省", "阳江市区", "", string.Empty, code, tempName, buildUnit, tempDate, bidUnit, tempDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                                    list.Add(info);
                                    if (!crawlAll && list.Count >= this.MaxCount)
                                    {
                                        return(list);
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 4
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("form"), new HasAttributeFilter("name", "qPageForm")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    NodeList aNode = new Parser(new Lexer(pageNode.ToHtml())).ExtractAllNodesThatMatch(new TagNameFilter("a"));
                    if (aNode != null && aNode.Count > 0)
                    {
                        string temp = aNode[aNode.Count - 2].GetATagHref().Replace("turnOverPage", "").Replace("(", "").Replace(")", "").Replace(";", "");
                        pageInt = int.Parse(temp);
                    }
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "channelCode", "pageIndex", "pageSize", "pointPageIndexId" }, new string[] {
                        "0005", i.ToString(), "15", "1"
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://heyuan.gdgpo.com/queryMoreInfoList.do", nvc, Encoding.UTF8);
                    }
                    catch
                    {
                        continue;
                    }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "m_m_c_list")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        ATag aTag = listNode[j].GetATag(1);
                        prjName   = aTag.GetAttribute("title");
                        beginDate = listNode[j].ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://heyuan.gdgpo.com" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "zw_c_c_cont")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt   = dtlNode.AsHtml();//.Replace("<br", "\r\n<br");
                            inviteCtx = HtmlTxt.ToCtxString();
                            code      = inviteCtx.GetCodeRegex().GetCodeDel();
                            buildUnit = inviteCtx.GetBuildRegex();
                            if (buildUnit.Contains("、"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("、"));
                            }
                            prjAddress = inviteCtx.GetAddressRegex();
                            inviteType = prjName.GetInviteBidType();
                            msgType    = "河源市政府采购";
                            specType   = "政府采购";
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "河源市区", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList fileNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (fileNode != null && fileNode.Count > 0)
                            {
                                for (int k = 0; k < fileNode.Count; k++)
                                {
                                    ATag fileAtag = fileNode[k].GetATag();
                                    if (fileAtag.IsAtagAttach())
                                    {
                                        string fileName = fileAtag.LinkText.ToNodeString().Replace(" ", "");
                                        string fileLink = fileAtag.Link;
                                        if (!fileLink.ToLower().Contains("http"))
                                        {
                                            fileLink = "http://heyuan.gdgpo.gov.cn" + fileAtag.Link;
                                        }
                                        base.AttachList.Add(ToolDb.GenBaseAttach(fileName, info.Id, fileLink));
                                    }
                                }
                            }
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 5
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default).Replace("&nbsp;", "");
            }
            catch
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "Body_div")), true), new TagNameFilter("li")));

            if (sNode != null && sNode.Count > 0)
            {
                for (int t = 0; t < sNode.Count; t++)
                {
                    string prjName = string.Empty,
                           buildUnit = string.Empty, bidUnit = string.Empty,
                           bidMoney = string.Empty, code = string.Empty,
                           bidDate = string.Empty, beginDate = string.Empty,
                           endDate = string.Empty, bidType = string.Empty,
                           specType = string.Empty, InfoUrl = string.Empty,
                           msgType = string.Empty, bidCtx = string.Empty,
                           prjAddress = string.Empty, remark = string.Empty,
                           prjMgr = string.Empty, otherType = string.Empty,
                           HtmlTxt = string.Empty, area = string.Empty;

                    INode node = sNode[t];
                    ATag  aTag = node.GetATag();
                    prjName   = aTag.GetAttribute("title");
                    beginDate = node.ToPlainTextString().GetDateRegex();
                    InfoUrl   = "http://www.tyjzsc.com.cn/" + aTag.Link.GetReplace("./");
                    string htmldtl = string.Empty;
                    try
                    {
                        htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                    }
                    catch { continue; }
                    parser = new Parser(new Lexer(htmldtl.GetReplace("th,TH", "td")));
                    NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "mytable")));
                    if (dtlNode != null && dtlNode.Count > 0)
                    {
                        HtmlTxt = dtlNode.AsHtml();
                        bidCtx  = "";
                        TableTag table = dtlNode[0] as TableTag;
                        for (int r = 0; r < table.RowCount; r++)
                        {
                            for (int c = 0; c < table.Rows[r].ColumnCount; c++)
                            {
                                if (c % 2 == 0)
                                {
                                    bidCtx += table.Rows[r].Columns[c].ToNodePlainString().GetReplace(":,:") + ":";
                                }
                                else
                                {
                                    bidCtx += table.Rows[r].Columns[c].ToNodePlainString().GetReplace(":,:") + "\r\n";
                                }
                            }
                        }

                        buildUnit  = bidCtx.GetBuildRegex();
                        prjAddress = bidCtx.GetAddressRegex();
                        code       = bidCtx.GetCodeRegex();
                        if (string.IsNullOrEmpty(code))
                        {
                            code = bidCtx.GetRegex("工程编码", true, 50);
                        }
                        bidUnit  = bidCtx.GetBidRegex();
                        bidMoney = bidCtx.GetMoneyRegex();
                        if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0")
                        {
                            bidMoney = bidCtx.GetMoneyRegex(new string[] { "投资总额" });
                        }
                        if (Encoding.Default.GetByteCount(prjName) > 200)
                        {
                            prjName = prjName.Substring(0, 100);
                        }
                        msgType  = "太原市建设工程交易中心";
                        specType = "建设工程";
                        bidType  = prjName.GetInviteBidType();
                        BidInfo info = ToolDb.GenBidInfo("山西省", "山西省及地市", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                        list.Add(info);
                    }
                    if (!crawlAll && list.Count >= this.MaxCount)
                    {
                        return(list);
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 6
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("li"), new HasAttributeFilter("class", "wb-page-li")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("/", "\r");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                SiteUrl = "http://www.jxsggzy.cn/web/jyxx/002005/002005004/" + i + ".html";
                try
                {
                    html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr);
                }
                catch { continue; }

                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("li"), new HasAttributeFilter("class", "ewb-list-node clearfix")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string prjName = string.Empty,
                               buildUnit = string.Empty, bidUnit = string.Empty,
                               bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty,
                               beginDate = string.Empty,
                               endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty,
                               msgType = string.Empty, bidCtx = string.Empty,
                               prjAddress = string.Empty, remark = string.Empty,
                               prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;
                        ATag aTag = listNode[j].GetATag();
                        prjName = aTag.GetAttribute("title");
                        if (string.IsNullOrWhiteSpace(prjName))
                        {
                            prjName = aTag.LinkText;
                        }
                        beginDate = listNode[j].ToPlainTextString().GetDateRegex();
                        if (prjName[2].Equals('县') || prjName[2].Equals('区') || prjName[2].Equals('市'))
                        {
                            area = prjName.Substring(0, 3);
                        }
                        InfoUrl = "http://www.jxsggzy.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "article-info")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.AsHtml();
                            bidCtx  = HtmlTxt.ToCtxString();
                            parser  = new Parser(new Lexer(HtmlTxt));
                            NodeList dtlBidNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellpadding", "0")));
                            if (dtlBidNode != null && dtlBidNode.Count > 0)
                            {
                                TableTag bidTable = dtlBidNode[0] as TableTag;
                                string   ctx      = string.Empty;
                                for (int r = 0; r < bidTable.RowCount; r++)
                                {
                                    for (int c = 0; c < bidTable.Rows[r].ColumnCount; c++)
                                    {
                                        string temp = bidTable.Rows[r].Columns[c].ToNodePlainString();
                                        if (string.IsNullOrEmpty(temp))
                                        {
                                            continue;
                                        }
                                        if ((c + 1) % 2 == 0)
                                        {
                                            ctx += temp + "\r\n";
                                        }
                                        else
                                        {
                                            ctx += temp + ":";
                                        }
                                    }
                                }
                                prjAddress = ctx.GetAddressRegex();
                                buildUnit  = ctx.GetBuildRegex();
                                bidUnit    = ctx.GetBidRegex(new string[] { "第一中标排序单位名称" });
                                bidMoney   = ctx.GetMoneyRegex();
                                prjMgr     = ctx.GetMgrRegex(new string[] { "建造师姓名" });
                                code       = ctx.GetCodeRegex();
                                bidCtx     = ctx;
                            }
                            else
                            {
                                prjAddress = bidCtx.GetAddressRegex();
                                buildUnit  = bidCtx.GetBuildRegex();
                                bidUnit    = bidCtx.GetBidRegex();
                                if (string.IsNullOrEmpty(bidUnit))
                                {
                                    bidUnit = bidCtx.GetRegex("第一中标排序人");
                                }
                                bidMoney = bidCtx.GetMoneyRegex();
                                prjMgr   = bidCtx.GetMgrRegex();
                                if (string.IsNullOrEmpty(prjMgr))
                                {
                                    prjMgr = bidCtx.GetRegex("注册监理工程师");
                                }
                                code = bidCtx.GetCodeRegex();
                            }
                            buildUnit  = buildUnit.Replace(" ", "");
                            bidUnit    = bidUnit.Replace(" ", "");
                            code       = code.Replace(" ", "");
                            prjMgr     = prjMgr.Replace(" ", "");
                            prjAddress = prjAddress.Replace(" ", "");
                            bidType    = "重点工程";
                            specType   = "政府采购";
                            msgType    = "江西省公共资源交易中心";
                            BidInfo info = ToolDb.GenBidInfo("江西省", "江西省及地市", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 7
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "clearfix")), true), new TagNameFilter("a")));

            if (sNode != null && sNode.Count > 0)
            {
                try
                {
                    string temp = sNode[sNode.Count - 1].GetATag().GetAttribute("onclick").Replace("(", "kdxx").Replace(",", "xxdk");
                    pageInt = int.Parse(temp.GetRegexBegEnd("kdxx", "xxdk"));
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://lhxq.szzfcg.cn/portal/topicView.do?method=view1&id=500100201&siteId=11&tstmp=15%3A25%3A51%20GMT%2B0800&page=" + i, Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("li"));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count - 1; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        beginDate = listNode[j].ToNodePlainString().GetDateRegex("yyyy/MM/dd");
                        ATag aTag = listNode[j].GetATag();
                        prjName = aTag.GetAttribute("title");
                        Regex  regexLink = new Regex(@"id=[^-]+");
                        string id        = regexLink.Match(aTag.Link).Value;
                        InfoUrl = "http://lhxq.szzfcg.cn/portal/documentView.do?method=view&" + id;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("body"));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml();
                            inviteCtx  = HtmlTxt.ToCtxString();
                            prjAddress = inviteCtx.GetAddressRegex();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            code       = inviteCtx.GetCodeRegex().GetCodeDel();
                            msgType    = "深圳市龙华新区公共资源交易中心";
                            specType   = "政府采购";
                            inviteType = "货物";
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳政府采购", "龙华新区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);

                            list.Add(info);

                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aTagNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aTagNode != null && aTagNode.Count > 0)
                            {
                                for (int k = 0; k < aTagNode.Count; k++)
                                {
                                    ATag aFile = aTagNode[k].GetATag();
                                    if (aFile.IsAtagAttach() || aFile.Link.ToLower().Contains("down"))
                                    {
                                        string link = string.Empty;
                                        if (aFile.Link.Contains("http"))
                                        {
                                            link = aFile.Link;
                                        }
                                        else
                                        {
                                            link = "http://lhxq.szzfcg.cn/" + aFile.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(aFile.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 8
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch { }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "ctl00_ContentPlaceHolder1_Repeater1_ctl16_lblpc")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[0].ToNodePlainString();
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&page=" + (i - 1).ToString(), Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "slist")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string prjName = string.Empty,
                               buildUnit = string.Empty, bidUnit = string.Empty,
                               bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty,
                               beginDate = string.Empty,
                               endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty,
                               msgType = string.Empty, bidCtx = string.Empty,
                               prjAddress = string.Empty, remark = string.Empty,
                               prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;
                        INode node = listNode[j];
                        ATag  aTag = node.GetATag();
                        prjName   = aTag.GetAttribute("title");
                        beginDate = DateTime.Now.Year + "-" + node.GetSpan().StringText.ToNodeString().GetReplace(" ");
                        area      = node.ToNodePlainString().GetReplace("[", "【").GetReplace("]", "】").GetRegexBegEnd("【", "】");
                        InfoUrl   = "http://www.xjztb.net/Homepage/" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "ctl00_ContentPlaceHolder1_Panel3")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.AsHtml();
                            parser  = new Parser(new Lexer(HtmlTxt));
                            NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                            if (tableNode != null && tableNode.Count > 0)
                            {
                                TableTag table = tableNode[tableNode.Count - 1] as TableTag;

                                bidCtx = string.Empty;
                                for (int r = 0; r < table.RowCount; r++)
                                {
                                    for (int c = 0; c < table.Rows[r].ColumnCount; c++)
                                    {
                                        string temp = table.Rows[r].Columns[c].ToNodePlainString();

                                        if ((c + 1) % 2 == 0)
                                        {
                                            bidCtx += temp + "\r\n";
                                        }
                                        else
                                        if (temp.Contains("工程师") || temp.Contains("注册证号"))
                                        {
                                            bidCtx += temp + "\r\n";
                                        }
                                        else
                                        {
                                            bidCtx += temp.GetReplace(":,:") + ":";
                                        }
                                    }
                                }
                            }
                            else
                            {
                                bidCtx = HtmlTxt.ToCtxString();
                            }
                            buildUnit = bidCtx.GetBuildRegex();
                            code      = bidCtx.GetCodeRegex().GetCodeDel();

                            bidUnit = bidCtx.GetBidRegex();
                            if (string.IsNullOrEmpty(bidUnit))
                            {
                                bidUnit = bidCtx.GetRegex("单位名称");
                            }
                            bidMoney = bidCtx.GetMoneyRegex();
                            prjMgr   = bidCtx.GetMgrRegex();
                            if (string.IsNullOrEmpty(prjMgr))
                            {
                                prjMgr = bidCtx.GetRegex("总施工工程师");
                            }
                            msgType  = "新疆维吾尔自治区建设工程招标投标监督管理办公室";
                            specType = "建设工程";
                            bidType  = "建设工程";
                            BidInfo info = ToolDb.GenBidInfo("新疆维吾尔自治区", "新疆维吾尔自治区及地市", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 9
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = ToolWeb.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("li"), new HasAttributeFilter("class", "wb-page-li")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("/", "\r");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                SiteUrl = "http://www.jxsggzy.cn/web/jyxx/002001/002001001/" + i + ".html";
                try
                {
                    html = ToolWeb.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr);
                }
                catch { continue; }

                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("li"), new HasAttributeFilter("class", "ewb-list-node clearfix")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;
                        ATag aTag = listNode[j].GetATag();
                        prjName = aTag.GetAttribute("title");

                        if (string.IsNullOrWhiteSpace(prjName))
                        {
                            prjName = aTag.LinkText;
                        }
                        beginDate = listNode[j].ToPlainTextString().GetDateRegex();
                        if (prjName[2].Equals('县') || prjName[2].Equals('区') || prjName[2].Equals('市'))
                        {
                            area = prjName.Substring(0, 3);
                        }
                        string a = aTag.Link;
                        InfoUrl = "http://www.jxsggzy.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = ToolWeb.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "article-info")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml();
                            inviteCtx  = HtmlTxt.ToCtxString();
                            prjAddress = inviteCtx.GetAddressRegex();
                            if (string.IsNullOrWhiteSpace(prjAddress))
                            {
                                prjAddress = inviteCtx.GetRegexBegEnd("地址", "项目");
                            }

                            buildUnit = inviteCtx.GetBuildRegex();
                            if (string.IsNullOrWhiteSpace(buildUnit))
                            {
                                buildUnit = inviteCtx.GetRegex("招标单位名称");
                            }

                            code = inviteCtx.GetCodeRegex();
                            if (string.IsNullOrWhiteSpace(code))
                            {
                                code = inviteCtx.GetRegexBegEnd("项目编号:", "招标");
                            }

                            inviteType = "房建及市政";
                            specType   = "政府采购";
                            msgType    = "江西省公共资源交易中心";
                            InviteInfo info = ToolDb.GenInviteInfo("江西省", "江西省及地市", area, string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 10
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch { return(list); }
            Parser parser = new Parser(new Lexer(html));

            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "MoreInfoListGG_Pager")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("页数:", "当前");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__VIEWSTATE", "__EVENTTARGET", "__EVENTARGUMENT"
                    }, new string[] {
                        viewState, "MoreInfoListGG$Pager", i.ToString()
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoListGG_DataGrid1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }
                        string prjName = string.Empty,
                               buildUnit = string.Empty, bidUnit = string.Empty,
                               bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty,
                               beginDate = string.Empty,
                               endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty,
                               msgType = string.Empty, bidCtx = string.Empty,
                               prjAddress = string.Empty, remark = string.Empty,
                               prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        string xian = aTag.LinkText.GetRegexBegEnd("【", "】");
                        prjName   = aTag.GetAttribute("title");
                        beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://www.zjbid.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8);
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtnode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "infodetail")));
                        if (dtnode != null && dtnode.Count > 0)
                        {
                            HtmlTxt = dtnode.AsHtml();
                            bidCtx  = HtmlTxt.GetReplace("</p>,</br>", "\r\n").GetReplace("<br />", "\r\n").ToCtxString();
                            bidUnit = bidCtx.GetBidRegex();
                            if (string.IsNullOrWhiteSpace(bidUnit))
                            {
                                bidUnit = bidCtx.GetRegex("中标人");
                            }
                            if (string.IsNullOrWhiteSpace(bidUnit))
                            {
                                bidUnit = bidCtx.GetRegex("预中标单位(第一名)");
                            }
                            if (string.IsNullOrWhiteSpace(bidUnit))
                            {
                                try
                                {
                                    parser = new Parser(new Lexer(htmldtl));
                                    NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "_Sheet1")));
                                    if (dtlNode != null && dtlNode.Count > 0)
                                    {
                                        TableTag dtlTable = dtlNode[0] as TableTag;
                                        HtmlTxt = dtlTable.ToHtml();
                                        string ctx = "";
                                        for (int r = 1; r < dtlTable.RowCount; r++)
                                        {
                                            for (int c = 0; c < dtlTable.Rows[r].ColumnCount; c++)
                                            {
                                                string temp = dtlTable.Rows[r].Columns[c].ToHtml().GetReplace("<br>,<br/>", "\r\n").ToCtxString();
                                                if (!temp.Contains("\r\n"))
                                                {
                                                    temp = dtlTable.Rows[r].Columns[c].ToNodePlainString();
                                                }
                                                if (!IsTable(dtlTable.Rows[r].ToHtml()))
                                                {
                                                    if ((c + 1) % 2 == 0)
                                                    {
                                                        ctx += temp + "\r\n";
                                                    }
                                                    else
                                                    {
                                                        ctx += temp.GetReplace(":,:") + ":";
                                                    }
                                                }
                                                else
                                                {
                                                    ctx += GetTableBid(dtlTable.Rows[r].ToHtml());
                                                }
                                            }
                                        }
                                        ctx  = ctx.GetReplace(":\r\n", ":");
                                        code = ctx.GetCodeRegex();
                                        if (string.IsNullOrWhiteSpace(code))
                                        {
                                            code = ctx.GetRegex("工程编码");
                                        }
                                        buildUnit = ctx.GetBuildRegex();
                                        if (string.IsNullOrEmpty(buildUnit))
                                        {
                                            buildUnit = ctx.GetRegex("建设单位");
                                        }
                                        if (string.IsNullOrWhiteSpace(buildUnit))
                                        {
                                            buildUnit = ctx.GetRegex("采购人名称");
                                        }
                                    }
                                }
                                catch { }
                            }
                            if (string.IsNullOrWhiteSpace(bidUnit))
                            {
                                try
                                {
                                    parser = new Parser(new Lexer(htmldtl));
                                    NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "_Sheet1_6_1")));
                                    if (dtlNode != null && dtlNode.Count > 0)
                                    {
                                        TableTag dtlTable = dtlNode[0] as TableTag;
                                        string   Html     = dtlTable.ToHtml();
                                        string   bidCtxt  = string.Empty;
                                        for (int c = 0; c < dtlTable.Rows[0].ColumnCount; c++)
                                        {
                                            bidCtxt += dtlTable.Rows[1].Columns[c].ToNodePlainString() + ":";
                                            bidCtxt += dtlTable.Rows[2].Columns[c].ToNodePlainString() + "\r\n";
                                        }

                                        bidCtxt = bidCtxt.GetReplace(":\r\n", ":");
                                        bidCtxt = bidCtxt.Replace("%", "");
                                        bidUnit = bidCtxt.GetBidRegex();
                                        if (string.IsNullOrEmpty(bidUnit))
                                        {
                                            bidUnit = bidCtxt.GetRegex("拟中标单位");
                                        }
                                        bidMoney = bidCtxt.GetMoneyRegex();
                                        if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney))
                                        {
                                            bidMoney = bidCtxt.GetRegex("中标价:").GetMoney();
                                        }
                                        prjMgr = bidCtxt.GetMgrRegex();
                                        if (string.IsNullOrWhiteSpace(prjMgr))
                                        {
                                            prjMgr = bidCtxt.GetRegex("项目经理");
                                        }
                                    }
                                }
                                catch { }
                            }

                            if (string.IsNullOrWhiteSpace(bidMoney))
                            {
                                bidMoney = bidCtx.GetMoneyRegex();
                            }
                            if (string.IsNullOrWhiteSpace(bidMoney))
                            {
                                try
                                {
                                    parser = new Parser(new Lexer(htmldtl));
                                    NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "_Sheet1_13_0")));
                                    if (dtlNode != null && dtlNode.Count > 0)
                                    {
                                        TableTag dtlTable = dtlNode[0] as TableTag;
                                        string   Html     = dtlTable.ToHtml();
                                        string   bidCtxt  = string.Empty;
                                        for (int c = 0; c < dtlTable.Rows[0].ColumnCount; c++)
                                        {
                                            bidCtxt += dtlTable.Rows[1].Columns[c].ToNodePlainString() + ":";
                                            bidCtxt += dtlTable.Rows[2].Columns[c].ToNodePlainString() + "\r\n";
                                        }
                                        if (string.IsNullOrWhiteSpace(bidUnit))
                                        {
                                            bidUnit = bidCtxt.GetRegex("中标供应商");
                                        }
                                        if (string.IsNullOrWhiteSpace(bidMoney))
                                        {
                                            bidMoney = bidCtxt.GetRegex("价格(元)");
                                        }
                                    }
                                }
                                catch { }
                            }
                            if (string.IsNullOrEmpty(bidUnit))
                            {
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList node = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "_Sheet1_13_0")));
                                if (node != null && node.Count > 0)
                                {
                                    TableTag bidTable = node[0] as TableTag;
                                    string   ctx      = string.Empty;
                                    if (bidTable.RowCount >= 3)
                                    {
                                        for (int r = 0; r < bidTable.Rows[1].ColumnCount; r++)
                                        {
                                            try
                                            {
                                                ctx += bidTable.Rows[1].Columns[r].ToNodePlainString() + ":";
                                                ctx += bidTable.Rows[2].Columns[r].ToNodePlainString() + "\r\n";
                                            }
                                            catch { }
                                        }
                                        bidUnit  = ctx.GetBidRegex();
                                        bidMoney = ctx.GetMoneyRegex(null, false, "万元");
                                    }
                                }
                            }

                            if (string.IsNullOrWhiteSpace(buildUnit))
                            {
                                buildUnit = bidCtx.GetBuildRegex();
                            }
                            if (string.IsNullOrWhiteSpace(buildUnit))
                            {
                                buildUnit = bidCtx.GetRegex("招标人");
                            }
                            if (string.IsNullOrWhiteSpace(buildUnit))
                            {
                                buildUnit = bidCtx.GetRegex("采购人名称");
                            }
                            if (string.IsNullOrWhiteSpace(code))
                            {
                                code = bidCtx.GetCodeRegex().GetCodeDel();
                            }
                            if (!string.IsNullOrWhiteSpace(code))
                            {
                                if (code[code.Length - 1] != '号')
                                {
                                    code = "";
                                }
                            }
                            if (string.IsNullOrWhiteSpace(code))
                            {
                                code = bidCtx.GetRegex("采购项目编号");
                            }

                            if (bidUnit.Contains("公司"))
                            {
                                bidUnit = bidUnit.Remove(bidUnit.IndexOf("公司")) + "公司";
                            }
                            if (buildUnit.Contains("开标"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("开标"));
                            }
                            try
                            {
                                if (Convert.ToDecimal(bidMoney) > 100000)
                                {
                                    bidMoney = (decimal.Parse(bidMoney) / 10000).ToString();
                                }
                            }
                            catch { }

                            msgType  = "浙江省招标投标办公室";
                            specType = "建设工程";
                            bidType  = ToolHtml.GetInviteTypes(prjName);

                            BidInfo info = ToolDb.GenBidInfo("浙江省", "浙江省及地市", xian, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 11
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList nodePage = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "r_con")), true), new TagNameFilter("a")));

            if (nodePage != null && nodePage.Count > 0)
            {
                try
                {
                    Regex  reg  = new Regex(@"[0-9]+");
                    string temp = reg.Match(nodePage[nodePage.Count - 1].GetATagHref().Replace("&#39;", "")).Value;
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://61.144.227.212/was5/web/search?page=" + i + "&channelid=235507&token=64.1504521027694.76&perpage=15&outlinepage=10&templet=jw_list.jsp", Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "zx_ml_list zx_ml_list_right")), true), new TagNameFilter("li")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    for (int j = 1; j < nodeList.Count; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        ATag aTag = nodeList[j].GetATag();
                        prjName = aTag.GetAttribute("title");
                        endDate = nodeList[j].ToPlainTextString().GetDateRegex();
                        if (prjName.Contains("]"))
                        {
                            int len = prjName.LastIndexOf("]");
                            prjName = prjName.Substring(len + 1, prjName.Length - len - 1);
                        }
                        InfoUrl = "http://61.144.227.212/was5/web/" + aTag.Link.Replace("./", "");
                        string htmlDtl = string.Empty;
                        try
                        {
                            htmlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }

                        parser = new Parser(new Lexer(htmlDtl));
                        NodeList nodeDtl = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "zx_xxgk_cont")));
                        if (nodeDtl != null && nodeDtl.Count > 0)
                        {
                            HtmlTxt   = nodeDtl.AsHtml();
                            inviteCtx = HtmlTxt.Replace("</p>", "\r\n").ToCtxString();
                            parser.Reset();
                            NodeList dateNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "tit")));
                            if (dateNode != null && dateNode.Count > 0)
                            {
                                beginDate = dateNode.AsString().GetDateRegex();
                            }

                            // NodeList buildNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "tit")));


                            buildUnit  = inviteCtx.GetBuildRegex();
                            code       = inviteCtx.GetCodeRegex();
                            prjAddress = inviteCtx.GetAddressRegex();
                            inviteType = prjName.GetInviteBidType();
                            specType   = "政府采购";
                            msgType    = "深圳市交通运输委员会";
                            if (string.IsNullOrEmpty(buildUnit))
                            {
                                buildUnit = msgType;
                            }
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳政府采购", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 12
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "pageConent")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("共", "页");
                    pageInt = int.Parse(temp.GetReplace("&nbsp;"));
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.scjst.gov.cn/main/034/034002/" + i + ".html");
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "info_panel")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count - 1; j++)
                    {
                        INode  node = listNode[j];
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty,
                               city = string.Empty;
                        ATag aTag   = node.GetATag();
                        prjName = aTag.GetAttribute("title");
                        InfoUrl = "http://www.scjst.gov.cn/main/034/034002/" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }

                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "detailcon")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml();
                            inviteCtx  = HtmlTxt.GetReplace("</span>,<br/>,<br>", "\r\n").ToCtxString();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            prjAddress = inviteCtx.GetAddressRegex();
                            code       = inviteCtx.GetCodeRegex().GetCodeDel();
                            if (inviteCtx.IndexOf("发布日期") != -1)
                            {
                                string ctx = inviteCtx.Substring(inviteCtx.IndexOf("发布日期"), inviteCtx.Length - inviteCtx.IndexOf("发布日期"));
                                beginDate = ctx.GetDateRegex();
                            }
                            else if (inviteCtx.IndexOf("发布时间") != -1)
                            {
                                string ctx = inviteCtx.Substring(inviteCtx.IndexOf("发布时间"), inviteCtx.Length - inviteCtx.IndexOf("发布时间"));
                                beginDate = ctx.GetDateRegex();
                            }
                            if (string.IsNullOrEmpty(beginDate))
                            {
                                beginDate = DateTime.Now.ToString("yyyy-MM-dd");
                            }
                            inviteType = prjName.GetInviteBidType();
                            specType   = "建设工程";
                            msgType    = "四川省住房和城乡建设厅";
                            InviteInfo info = ToolDb.GenInviteInfo("四川省", "四川省及地市", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag tag = aNode[k] as ATag;
                                    if (tag.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (tag.Link.ToLower().Contains("http"))
                                        {
                                            link = tag.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.scjst.gov.cn/" + tag.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(tag.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 13
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "mtop pages")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("1/", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.gsei.com.cn/index.php/cms/item-list-category-1337-page-" + i + ".shtml", Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "label_ul_b")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        ATag aTag = listNode[j].GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }
                        string prjName = string.Empty,
                               buildUnit = string.Empty, bidUnit = string.Empty,
                               bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty,
                               beginDate = string.Empty,
                               endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty,
                               msgType = string.Empty, bidCtx = string.Empty,
                               prjAddress = string.Empty, remark = string.Empty,
                               prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        prjName   = aTag.GetAttribute("title");
                        beginDate = listNode[j].ToPlainTextString().GetDateRegex();
                        InfoUrl   = aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "p8_content_show")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.AsHtml();
                            bidCtx  = HtmlTxt.ToLower().GetReplace("</p>,<br/>", "\r\n").ToCtxString();
                            bidUnit = bidCtx.GetBidRegex();
                            if (string.IsNullOrWhiteSpace(bidUnit))
                            {
                                bidUnit = bidCtx.GetRegex("第一成交候选人,第一名,中标人为,中标单位名称");
                            }
                            bidMoney = bidCtx.GetMoneyRegex(null, false, "万元");
                            if (string.IsNullOrWhiteSpace(bidMoney) || bidMoney == "0")
                            {
                                bidMoney = bidCtx.GetMoneyRegex(new string[] { "中标造价" }, false, "万元");
                            }
                            prjMgr    = bidCtx.GetMgrRegex();
                            buildUnit = bidCtx.GetBuildRegex();
                            code      = bidCtx.GetCodeRegex().GetCodeDel();
                            if (string.IsNullOrEmpty(bidUnit))
                            {
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                                if (tableNode != null && tableNode.Count > 0)
                                {
                                    TableTag tag = tableNode[0] as TableTag;
                                    string   ctx = string.Empty;
                                    for (int r = 0; r < tag.RowCount; r++)
                                    {
                                        string rowName = tag.Rows[r].ToNodePlainString();
                                        if (rowName.Contains("中标候选人名称") || rowName.Contains("中标价"))
                                        {
                                            for (int c = 0; c < 7; c++)
                                            {
                                                try
                                                {
                                                    if (c < 3)
                                                    {
                                                        ctx += tag.Rows[r].Columns[c].ToNodePlainString().GetReplace(":,:") + ":";
                                                    }
                                                    else
                                                    {
                                                        ctx += tag.Rows[r + 1].Columns[c - 3].ToNodePlainString().GetReplace(":,:") + ":";
                                                    }

                                                    ctx += tag.Rows[r + 2].Columns[c].ToNodePlainString().GetReplace(":,:") + "\r\n";
                                                }
                                                catch { }
                                            }
                                        }
                                        else
                                        {
                                            for (int c = 0; c < tag.Rows[r].ColumnCount; c++)
                                            {
                                                string temp = tag.Rows[r].Columns[c].ToNodePlainString();

                                                if ((c + 1) % 2 == 0)
                                                {
                                                    ctx += temp.GetReplace(":,:") + "\r\n";
                                                }
                                                else
                                                {
                                                    ctx += temp.GetReplace(":,:") + ":";
                                                }
                                            }
                                        }
                                        if (rowName.Contains("中标候选人名称") || rowName.Contains("中标价"))
                                        {
                                            break;
                                        }
                                    }
                                    bidUnit = ctx.GetBidRegex().GetReplace("第一名,第二名,第三名,名次");
                                    if (string.IsNullOrEmpty(bidUnit))
                                    {
                                        bidUnit = ctx.GetRegex("中标候选人名称");
                                    }
                                    if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney))
                                    {
                                        bidMoney = ctx.GetMoneyRegex();
                                    }
                                    if (string.IsNullOrEmpty(buildUnit))
                                    {
                                        buildUnit = ctx.GetBuildRegex();
                                    }
                                    if (string.IsNullOrEmpty(prjMgr) || prjMgr.IsNumber())
                                    {
                                        prjMgr = ctx.GetMgrRegex();
                                    }
                                    if (string.IsNullOrEmpty(code))
                                    {
                                        code = ctx.GetCodeRegex().GetCodeDel();
                                    }

                                    if (string.IsNullOrEmpty(bidUnit) || bidUnit.IsNumber())
                                    {
                                        ctx = string.Empty;
                                        for (int r = 0; r < tag.RowCount; r++)
                                        {
                                            for (int c = 0; c < tag.Rows[r].ColumnCount; c++)
                                            {
                                                string temp = tag.Rows[r].Columns[c].ToNodePlainString();

                                                if ((c + 1) % 2 == 0)
                                                {
                                                    ctx += temp.GetReplace(":,:") + "\r\n";
                                                }
                                                else
                                                {
                                                    ctx += temp.GetReplace(":,:") + ":";
                                                }
                                            }
                                        }
                                        bidUnit = ctx.GetBidRegex().GetReplace("第一名,第二名,第三名,名次");
                                        if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney))
                                        {
                                            bidMoney = ctx.GetMoneyRegex();
                                        }
                                        if (string.IsNullOrEmpty(buildUnit))
                                        {
                                            buildUnit = ctx.GetBuildRegex();
                                        }
                                        if (string.IsNullOrEmpty(prjMgr) || prjMgr.IsNumber())
                                        {
                                            prjMgr = ctx.GetMgrRegex();
                                        }
                                        if (string.IsNullOrEmpty(code))
                                        {
                                            code = ctx.GetCodeRegex().GetCodeDel();
                                        }

                                        if (string.IsNullOrEmpty(bidUnit) || bidUnit.IsNumber())
                                        {
                                            ctx = string.Empty;
                                            for (int c = 0; c < tag.Rows[0].ColumnCount; c++)
                                            {
                                                try
                                                {
                                                    ctx += tag.Rows[0].Columns[c].ToNodePlainString().GetReplace(":,:") + ":";
                                                    ctx += tag.Rows[1].Columns[c].ToNodePlainString().GetReplace(":,:") + "\r\n";
                                                }
                                                catch { }
                                            }
                                            bidUnit = ctx.GetBidRegex().GetReplace("第一名,第二名,第三名,名次");
                                            if (string.IsNullOrEmpty(bidUnit))
                                            {
                                                bidUnit = ctx.GetRegex("中标候选人名称");
                                            }
                                            if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney))
                                            {
                                                bidMoney = ctx.GetMoneyRegex();
                                            }
                                            if (string.IsNullOrEmpty(buildUnit))
                                            {
                                                buildUnit = ctx.GetBuildRegex();
                                            }
                                            if (string.IsNullOrEmpty(prjMgr) || prjMgr.IsNumber())
                                            {
                                                prjMgr = ctx.GetMgrRegex();
                                            }
                                            if (string.IsNullOrEmpty(code))
                                            {
                                                code = ctx.GetCodeRegex().GetCodeDel();
                                            }
                                        }
                                        if (string.IsNullOrEmpty(bidUnit) || bidUnit.IsNumber())
                                        {
                                            for (int r = 0; r < tag.RowCount; r++)
                                            {
                                                string rowName = tag.Rows[r].ToNodePlainString();
                                                if (rowName.Contains("中标候选人名称") || rowName.Contains("中标价"))
                                                {
                                                    for (int c = 0; c < tag.Rows[r].ColumnCount; c++)
                                                    {
                                                        try
                                                        {
                                                            ctx += tag.Rows[r].Columns[c].ToNodePlainString().GetReplace(":,:") + ":";
                                                            ctx += tag.Rows[r + 1].Columns[c].ToNodePlainString().GetReplace(":,:") + "\r\n";
                                                        }
                                                        catch { }
                                                    }
                                                }
                                                else
                                                {
                                                    for (int c = 0; c < tag.Rows[r].ColumnCount; c++)
                                                    {
                                                        string temp = tag.Rows[r].Columns[c].ToNodePlainString();

                                                        if ((c + 1) % 2 == 0)
                                                        {
                                                            ctx += temp.GetReplace(":,:") + "\r\n";
                                                        }
                                                        else
                                                        {
                                                            ctx += temp.GetReplace(":,:") + ":";
                                                        }
                                                    }
                                                }
                                                if (rowName.Contains("中标候选人名称") || rowName.Contains("中标价"))
                                                {
                                                    break;
                                                }
                                            }
                                            bidUnit = ctx.GetBidRegex().GetReplace("第一名,第二名,第三名,名次");
                                            if (string.IsNullOrEmpty(bidUnit))
                                            {
                                                bidUnit = ctx.GetRegex("中标候选人名称");
                                            }
                                            if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney))
                                            {
                                                bidMoney = ctx.GetMoneyRegex();
                                            }
                                            if (string.IsNullOrEmpty(buildUnit))
                                            {
                                                buildUnit = ctx.GetBuildRegex();
                                            }
                                            if (string.IsNullOrEmpty(prjMgr) || prjMgr.IsNumber())
                                            {
                                                prjMgr = ctx.GetMgrRegex();
                                            }
                                            if (string.IsNullOrEmpty(code))
                                            {
                                                code = ctx.GetCodeRegex().GetCodeDel();
                                            }
                                        }
                                    }
                                }
                            }

                            if (bidUnit.Contains("公司"))
                            {
                                bidUnit = bidUnit.Remove(bidUnit.IndexOf("公司")) + "公司";
                            }
                            if (bidUnit.Contains("中标价"))
                            {
                                bidUnit = "";
                            }
                            if (buildUnit.Contains("公司"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                            }
                            if (buildUnit.Contains("地址"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址"));
                            }
                            List <string> imgList = new List <string>();
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList imgNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                            if (imgNode != null && imgNode.Count > 0)
                            {
                                for (int m = 0; m < imgNode.Count; m++)
                                {
                                    ImageTag tag     = imgNode[m] as ImageTag;
                                    string   link    = tag.GetAttribute("src");
                                    string   webLink = "http://www.gsei.com.cn/" + link;
                                    HtmlTxt = HtmlTxt.GetReplace(link, webLink);
                                    imgList.Add(webLink);
                                }
                            }
                            if (!bidUnit.Contains("公司") && !bidUnit.Contains("研究院") && !bidUnit.Contains("管理局") && !bidUnit.Contains("院"))
                            {
                                bidUnit = "";
                            }
                            msgType  = "甘肃省信息中心";
                            specType = "政府采购";
                            bidType  = "建设工程";
                            BidInfo info = ToolDb.GenBidInfo("甘肃省", "甘肃省及地市", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            if (imgList.Count > 0)
                            {
                                foreach (string img in imgList)
                                {
                                    string linkName = string.Empty;
                                    if (img.Contains("/"))
                                    {
                                        linkName = img.Substring(img.LastIndexOf("/"));
                                    }
                                    else
                                    {
                                        linkName = img;
                                    }
                                    BaseAttach attach = ToolDb.GenBaseAttach(linkName, info.Id, img);
                                    base.AttachList.Add(attach);
                                }
                            }
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.gsei.com.cn/" + a.Link.GetReplace("../,./");
                                        }
                                        if (Encoding.Default.GetByteCount(link) > 500)
                                        {
                                            continue;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 14
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <NoticeInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default, ref cookiestr);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("nowrap", "true")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("总页数", "当前页").Replace(":", "");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState = this.ToolWebSite.GetAspNetViewState(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__VIEWSTATE",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT"
                    }, new string[] {
                        viewState,
                        "MoreInfoList1$Pager",
                        i.ToString()
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string InfoTitle = string.Empty, InfoType = string.Empty, PublistTime = string.Empty, InfoCtx = string.Empty, InfoUrl = string.Empty, prjCode = string.Empty, buildUnit = string.Empty, htmlTxt = string.Empty;
                        InfoType = "澄清修改通知";
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        InfoTitle   = aTag.GetAttribute("title");
                        PublistTime = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoUrl     = "http://www.hbggzy.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            htmlTxt = dtlNode.AsHtml();
                            InfoCtx = htmlTxt.GetReplace("</p>,<br />,<br/>", "\r\n").ToCtxString();
                            NoticeInfo info = ToolDb.GenNoticeInfo("湖北省", "湖北省及地市", "", string.Empty, InfoTitle, InfoType, InfoCtx, PublistTime, string.Empty, "湖北省公共资源交易中心", InfoUrl, prjCode, buildUnit, string.Empty, string.Empty, "水利工程", string.Empty, htmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(htmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.hbggzy.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 15
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            string postUrl         = "http://www.gdgpo.gov.cn/queryMoreInfoList.do";
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("form"), new HasAttributeFilter("name", "qPageForm")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[0].ToNodePlainString();
                    temp = temp.GetRegexBegEnd("共", "条");
                    int total = int.Parse(temp);
                    pageInt = total / 15 + 1;
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "channelCode",
                        "pointPageIndexId",
                        "pageIndex",
                        "pageSize"
                    }, new string[] {
                        "0005",
                        "1",
                        i.ToString(),
                        "15"
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(postUrl, nvc);
                    }
                    catch
                    {
                        continue;
                    }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "m_m_c_list")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        INode  node = listNode[j];
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;

                        ATag aTag = node.GetATag(1);
                        beginDate = node.ToPlainTextString().GetDateRegex();
                        prjName   = aTag.GetAttribute("title");
                        InfoUrl   = "http://www.gdgpo.gov.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "zw_c_c_cont")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt   = dtlNode.AsHtml();
                            inviteCtx = HtmlTxt.GetReplace("</p>,<br/>", "\r\n").ToCtxString();

                            buildUnit  = inviteCtx.GetBuildRegex();
                            prjAddress = inviteCtx.GetAddressRegex();
                            code       = inviteCtx.GetCodeRegex().GetCodeDel();
                            inviteType = prjName.GetInviteBidType();
                            specType   = "政府采购";
                            msgType    = "广东省财政厅政府采购";

                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "广州政府采购", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNodes = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNodes != null && aNodes.Count > 0)
                            {
                                for (int a = 0; a < aNodes.Count; a++)
                                {
                                    ATag aFile = aNodes[a] as ATag;
                                    if (aFile.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (aFile.Link.ToLower().Contains("http"))
                                        {
                                            link = aFile.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.gdgpo.gov.cn/" + aFile.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(aFile.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 16
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "0h120")), true), new TagNameFilter("a")));

            if (sNode != null && sNode.Count > 0)
            {
                try
                {
                    string temp = sNode[sNode.Count - 2].ToNodePlainString();
                    pageInt = Convert.ToInt32(temp.GetReplace("[,]"));
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&page=" + i, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList viewList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "0h120")));
                if (viewList != null && viewList.Count > 0)
                {
                    for (int j = 0; j < viewList.Count; j++)
                    {
                        TableTag table = viewList[j] as TableTag;
                        string   prjName = string.Empty, InfoUrl = string.Empty, beginDate = string.Empty, HtmlTxt = string.Empty;
                        ATag     aTag = viewList[j].GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }
                        prjName   = aTag.GetAttribute("title").Trim();
                        beginDate = table.ToNodePlainString().GetDateRegex();
                        InfoUrl   = "http://baokou.huidong.gov.cn/" + aTag.Link;
                        string htlDtl = string.Empty;
                        try
                        {
                            htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htlDtl));
                        NodeList dtl = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "fontzoom")));
                        if (dtl != null && dtl.Count > 0)
                        {
                            HtmlTxt = dtl.AsHtml();

                            if (prjName.Contains("中标") || prjName.Contains("成交") || prjName.Contains("结果"))
                            {
                                string buildUnit = string.Empty, bidUnit = string.Empty,
                                       bidMoney = string.Empty, code = string.Empty,
                                       bidDate = string.Empty,
                                       endDate = string.Empty, bidType = string.Empty,
                                       specType = string.Empty,
                                       msgType = string.Empty, bidCtx = string.Empty,
                                       prjAddress = string.Empty, remark = string.Empty,
                                       prjMgr = string.Empty, otherType = string.Empty;
                                bidCtx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString();
                                string tempName = string.Empty;
                                if (prjName.Length == 4)
                                {
                                    tempName = bidCtx.GetRegex("项目名称,工程名称");
                                }
                                if (!string.IsNullOrEmpty(tempName))
                                {
                                    prjName = tempName;
                                }

                                code      = bidCtx.GetCodeRegex().GetCodeDel();
                                buildUnit = bidCtx.GetBuildRegex();
                                if (buildUnit.Contains("招标代理"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理"));
                                }
                                if (buildUnit.Contains("公司"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                                }

                                bidUnit = bidCtx.GetBidRegex();
                                if (string.IsNullOrEmpty(bidUnit))
                                {
                                    bidUnit = bidCtx.GetRegex("中标候选公司,中标候选人");
                                }
                                bidMoney = bidCtx.GetMoneyRegex();
                                if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0")
                                {
                                    string ctx = bidCtx.GetReplace("元\r\n,元;\r\n", "元kdxx").GetRegexBegEnd("中标价", "kdxx");
                                    bidMoney = ctx.GetMoney("万元");
                                }
                                try
                                {
                                    if (decimal.Parse(bidMoney) > 100000)
                                    {
                                        bidMoney = (decimal.Parse(bidMoney) / 10000).ToString();
                                    }
                                }
                                catch { }
                                Parser   imgParser = new Parser(new Lexer(HtmlTxt.ToLower()));
                                NodeList imgNode   = imgParser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                                string   src       = string.Empty;
                                if (imgNode != null && imgNode.Count > 0)
                                {
                                    string imgUrl = (imgNode[0] as ImageTag).GetAttribute("src");
                                    src     = "http://baokou.huidong.gov.cn/" + imgUrl;
                                    HtmlTxt = HtmlTxt.ToLower().GetReplace(imgUrl, src);
                                }
                                msgType  = "惠东县宝口镇人民政府";
                                specType = "政府采购";
                                bidType  = prjName.GetInviteBidType();
                                BidInfo info = ToolDb.GenBidInfo("广东省", "惠州市区", "惠东县", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType,
                                                                 bidMoney, InfoUrl, prjMgr, HtmlTxt);
                                list.Add(info);
                                if (!string.IsNullOrEmpty(src))
                                {
                                    string sql = string.Format("select Id from BidInfo where InfoUrl='{0}'", info.InfoUrl);
                                    object obj = ToolDb.ExecuteScalar(sql);
                                    if (obj == null || obj.ToString() == "")
                                    {
                                        try
                                        {
                                            BaseAttach attach = ToolHtml.GetBaseAttach(src, prjName, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                            if (attach != null)
                                            {
                                                ToolDb.SaveEntity(attach, "");
                                            }
                                        }
                                        catch { }
                                    }
                                }
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k].GetATag();
                                        if (a.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = a.Link;
                                            }
                                            else
                                            {
                                                link = "http://baokou.huidong.gov.cn/" + a.Link;
                                            }
                                            BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                            base.AttachList.Add(attach);
                                        }
                                    }
                                }
                                if (!crawlAll && list.Count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }
                            else
                            {
                                string code = string.Empty, buildUnit = string.Empty,
                                       prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                                       specType = string.Empty, endDate = string.Empty,
                                       remark = string.Empty, inviteCon = string.Empty,
                                       CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty;

                                inviteCtx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString();
                                string tempName = string.Empty;
                                if (prjName.Length == 4)
                                {
                                    tempName = inviteCtx.GetRegex("项目名称,工程名称");
                                }
                                if (!string.IsNullOrEmpty(tempName))
                                {
                                    prjName = tempName;
                                }

                                inviteType = prjName.GetInviteBidType();

                                code       = inviteCtx.GetCodeRegex().GetCodeDel();
                                buildUnit  = inviteCtx.GetBuildRegex();
                                prjAddress = inviteCtx.GetAddressRegex();
                                if (buildUnit.Contains("招标代理"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理"));
                                }
                                if (buildUnit.Contains("公司"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                                }

                                Parser   imgParser = new Parser(new Lexer(HtmlTxt.ToLower()));
                                NodeList imgNode   = imgParser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                                string   src       = string.Empty;
                                if (imgNode != null && imgNode.Count > 0)
                                {
                                    string imgUrl = (imgNode[0] as ImageTag).GetAttribute("src");
                                    src     = "http://baokou.huidong.gov.cn/" + imgUrl;
                                    HtmlTxt = HtmlTxt.ToLower().GetReplace(imgUrl, src);
                                }
                                msgType = "惠东县宝口镇人民政府";

                                specType = "政府采购";

                                InviteInfo info = ToolDb.GenInviteInfo("广东省", "惠州市区", "惠东县", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                                list.Add(info);
                                if (!string.IsNullOrEmpty(src))
                                {
                                    string sql = string.Format("select Id from InviteInfo where InfoUrl='{0}'", info.InfoUrl);
                                    object obj = ToolDb.ExecuteScalar(sql);
                                    if (obj == null || obj.ToString() == "")
                                    {
                                        try
                                        {
                                            BaseAttach attach = ToolHtml.GetBaseAttach(src, prjName, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                            if (attach != null)
                                            {
                                                ToolDb.SaveEntity(attach, "");
                                            }
                                        }
                                        catch { }
                                    }
                                }
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k].GetATag();
                                        if (a.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = a.Link;
                                            }
                                            else
                                            {
                                                link = "http://baokou.huidong.gov.cn/" + a.Link;
                                            }
                                            BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                            base.AttachList.Add(attach);
                                        }
                                    }
                                }
                                if (!crawlAll && list.Count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 17
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <NoticeInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "pageZone")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("/", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&page=" + i);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "listZone")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string InfoTitle = string.Empty, InfoType = string.Empty, PublistTime = string.Empty, InfoCtx = string.Empty, InfoUrl = string.Empty, prjCode = string.Empty, buildUnit = string.Empty, htmlTxt = string.Empty;
                        INode  node = listNode[j];
                        ATag   aTag = node.GetATag();
                        InfoType    = "资格预审";
                        InfoTitle   = aTag.GetAttribute("title");
                        PublistTime = node.ToPlainTextString().GetDateRegex();
                        string area = aTag.LinkText.GetRegexBegEnd("【", "】");
                        InfoUrl = "http://www.hljztb.com/" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "bidtable")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            htmlTxt = dtlNode.AsHtml();
                            TableTag table = dtlNode[0] as TableTag;
                            for (int r = 0; r < table.RowCount; r++)
                            {
                                for (int c = 0; c < table.Rows[r].ColumnCount; c++)
                                {
                                    string temp = table.Rows[r].Columns[c].ToNodePlainString();
                                    if ((c + 1) % 2 == 0)
                                    {
                                        InfoCtx += temp.GetReplace(":,:") + "\r\n";
                                    }
                                    else
                                    {
                                        InfoCtx += temp.GetReplace(":,:") + ":";
                                    }
                                }
                            }
                            buildUnit = InfoCtx.GetBuildRegex();
                            prjCode   = InfoCtx.GetRegex("编码");
                            NoticeInfo info = ToolDb.GenNoticeInfo("黑龙江省", "黑龙江省及地市", area, string.Empty, InfoTitle, InfoType, InfoCtx, PublistTime, string.Empty, "黑龙江住房和城乡建设厅", InfoUrl, prjCode, buildUnit, string.Empty, string.Empty, "建设工程", string.Empty, htmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(htmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.hljztb.com/" + a.Link.GetReplace("../,./");
                                        }
                                        if (Encoding.Default.GetByteCount(link) > 500)
                                        {
                                            continue;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                        else
                        {
                            Logger.Error("无内容");
                            Logger.Error(InfoUrl);
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 18
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            int    pageInt         = 400;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch { return(list); }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://xxgk.dgca.gov.cn/dgca/1100/caxxgklist_" + i + ".shtml");
                    }
                    catch { continue; }
                }
                Parser   parser   = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "rightbobj01 list_bg")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        TableRow tr = table.Rows[j];
                        string   code = string.Empty, prjName = string.Empty, beginDate = string.Empty, InfoUrl = string.Empty;

                        ATag atag = tr.Columns[1].GetATag();

                        prjName = atag.GetAttribute("title").GetReplace(" ");
                        if (!prjName.Contains("中标") && !prjName.Contains("招标"))
                        {
                            continue;
                        }

                        code      = tr.Columns[0].ToNodePlainString();
                        beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://xxgk.dgca.gov.cn/" + atag.Link.GetReplace("../");
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "zoom")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            if (prjName.Contains("中标"))
                            {
                                string buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, bidDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;


                                HtmlTxt = dtlNode.AsHtml().ToLower();
                                bidCtx  = HtmlTxt.GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString();

                                buildUnit = bidCtx.GetBuildRegex();
                                bidUnit   = bidCtx.GetBidRegex();
                                bidMoney  = bidCtx.GetMoneyRegex(new string[] { "中标值" });//.GetMoney();
                                if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0")
                                {
                                    bidMoney = bidCtx.GetMoneyRegex();
                                }
                                prjMgr = bidCtx.GetMgrRegex();

                                specType = "政府采购";
                                bidType  = prjName.GetInviteBidType();
                                msgType  = "东莞市长安镇政府";
                                BidInfo info = ToolDb.GenBidInfo("广东省", "东莞市区", "长安镇", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                                list.Add(info);
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k].GetATag();
                                        if (a.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = a.Link;
                                            }
                                            else
                                            {
                                                link = "http://xxgk.dgca.gov.cn/" + a.Link;
                                            }
                                            BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                            base.AttachList.Add(attach);
                                        }
                                    }
                                }
                            }
                            else if (prjName.Contains("招标"))
                            {
                                string buildUnit = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;

                                HtmlTxt   = dtlNode.AsHtml().ToLower();
                                inviteCtx = HtmlTxt.GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString();

                                buildUnit  = inviteCtx.GetBuildRegex();
                                prjAddress = inviteCtx.GetAddressRegex();

                                if (buildUnit.Contains("公司"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                                }

                                specType   = "政府采购";
                                inviteType = prjName.GetInviteBidType();
                                msgType    = "东莞市长安镇政府";

                                InviteInfo info = ToolDb.GenInviteInfo("广东省", "东莞市区", "长安镇", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                                list.Add(info);
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k].GetATag();
                                        if (a.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = a.Link;
                                            }
                                            else
                                            {
                                                link = "http://xxgk.dgca.gov.cn/" + a.Link;
                                            }
                                            BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                            base.AttachList.Add(attach);
                                        }
                                    }
                                }

                                if (!crawlAll && list.Count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }

                            if (list.Count % 20 == 0)
                            {
                                Thread.Sleep(1000 * 600);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 19
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1000;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch { return(list); }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__VIEWSTATE",
                        "__VIEWSTATEENCRYPTED",
                        "__EVENTVALIDATION",
                        "searchIndex1$tbx_Content",
                        "searchIndex1$ddl_Type"
                    }, new string[] {
                        "GV_Data",
                        "Page$" + i,
                        viewState,
                        "",
                        eventValidation,
                        "--标题关键字--",
                        "4"
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc);
                    }
                    catch { continue; }
                }
                Parser   parser   = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "GV_Data")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount - 1; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        prjName   = aTag.LinkText;
                        beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        string htmldtl = string.Empty;
                        string postid  = aTag.GetAttribute("href").GetRegexBegEnd("'", "'");
                        try
                        {
                            htmldtl = System.Web.HttpUtility.HtmlDecode(GetHtml(html, postid).GetJsString());
                        }
                        catch { }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellpadding", "3")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            parser.Reset();
                            NodeList formNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("form"), new HasAttributeFilter("id", "form1")));
                            if (formNode != null && formNode.Count > 0)
                            {
                                InfoUrl = "http://www.hrbjjzx.cn/Bid_Front/" + (formNode[0] as FormTag).GetAttribute("action");
                            }
                            else
                            {
                                continue;
                            }
                            HtmlTxt   = dtlNode.AsHtml().GetReplace("<br>", "<br />");
                            inviteCtx = HtmlTxt.ToLower().GetReplace("<br/>,<br>,</p>", "\r\n").GetReplace("untitleddocument").ToCtxString().GetReplace("untitleddocument, , ");
                            TableTag tag = dtlNode[dtlNode.Count - 1] as TableTag;
                            string   ctx = string.Empty;
                            for (int r = 0; r < tag.RowCount; r++)
                            {
                                if (r > 2)
                                {
                                    break;
                                }
                                for (int c = 0; c < tag.Rows[r].ColumnCount; c++)
                                {
                                    string temp = tag.Rows[r].Columns[c].ToNodePlainString();
                                    if ((c + 1) % 2 == 0)
                                    {
                                        ctx += temp.GetReplace(":,:, ") + "\r\n";
                                    }
                                    else
                                    {
                                        ctx += temp.GetReplace(":,:, ") + ":";
                                    }
                                }
                            }
                            if (prjName.Contains("..."))
                            {
                                prjName = ctx.GetRegex("项目");
                            }
                            buildUnit  = ctx.GetBuildRegex();
                            prjAddress = inviteCtx.GetAddressRegex().GetReplace(" ");
                            if (buildUnit.Contains("公司"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                            }
                            if (buildUnit.Contains("联系"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("联系"));
                            }
                            if (buildUnit.Contains("地址"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址"));
                            }
                            code = inviteCtx.GetCodeRegex().GetCodeDel();
                            if (string.IsNullOrEmpty(code))
                            {
                                code = inviteCtx.GetRegex("编码");
                            }
                            msgType  = "哈尔滨建设工程交易中心";
                            specType = inviteType = "建设工程";
                            InviteInfo info = ToolDb.GenInviteInfo("黑龙江省", "黑龙江省及地市", "哈尔滨市", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);

                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 20
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "693")));

            if (pageNode != null && pageNode.Count > 0)
            {
                TableTag table = pageNode[pageNode.Count - 1] as TableTag;
                try
                {
                    ATag   node = table.Rows[table.RowCount - 1].Columns[1].GetATag(1);
                    string temp = node.GetAttribute("href").GetReplace("/www/zbgs.asp?qyfl=%&qydz=%&qymc=%&native_place=&post_title=&polity_identity=&bz=&act=&typeid=&curyear=&query_like=&query_like_logic=&query_like_input=&query_logic_1=&query_logic_2=&query_ct_type=&query_date=&query_date_logic=''&queryyear=&querymonth=&queryday=&curpagenum=");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "?curpagenum=" + i, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "693")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[listNode.Count - 1] as TableTag;
                    for (int j = 0; j < table.RowCount - 1; j++)
                    {
                        string prjName = string.Empty,
                               buildUnit = string.Empty, bidUnit = string.Empty,
                               bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty,
                               beginDate = string.Empty,
                               endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty,
                               msgType = string.Empty, bidCtx = string.Empty,
                               prjAddress = string.Empty, remark = string.Empty,
                               prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;

                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        prjName   = aTag.LinkText;
                        beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex("yyyy年MM月dd日");
                        InfoUrl   = "http://www.qhbid.com.cn/www/" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "con")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt   = dtlNode.AsHtml().GetReplace("</p>,<br/>", "\r\n");
                            bidCtx    = HtmlTxt.ToCtxString();
                            buildUnit = bidCtx.GetBuildRegex();
                            bidUnit   = bidCtx.GetBidRegex();
                            if (string.IsNullOrEmpty(bidUnit))
                            {
                                bidUnit = bidCtx.GetRegex("(1)", false);
                            }
                            if (string.IsNullOrEmpty(bidUnit))
                            {
                                bidUnit = bidCtx.GetReplace("中标候选人:\r\n", "中标候选人:").GetRegex("中标候选人");
                            }
                            bidMoney = bidCtx.GetMoneyRegex();
                            if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0")
                            {
                                bidMoney = bidCtx.GetRegex("中标金(人民币)", false).GetMoney();
                            }
                            prjMgr = bidCtx.GetMgrRegex();
                            if (string.IsNullOrEmpty(prjMgr))
                            {
                                prjMgr = bidCtx.GetRegex("项目负责人(建造师),建造师,项目经理");
                            }
                            code    = bidCtx.GetCodeRegex().GetCodeDel();
                            bidType = specType = "建设工程";
                            msgType = "青海省建设工程招标投标管理办公室";
                            BidInfo info = ToolDb.GenBidInfo("青海省", "青海省及地市", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.qhbid.com.cn/" + a.Link.GetReplace("../,./");
                                        }
                                        if (Encoding.Default.GetByteCount(link) > 500)
                                        {
                                            continue;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 21
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch((new TagNameFilter("div")));

            if (pageNode != null && pageNode.Count > 0)
            {
                string temp = pageNode[31].ToNodePlainString().GetRegexBegEnd("/", "页");
                pageInt = int.Parse(temp);
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        int emp = i - 1;
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.hyggzy.com/zfzbggxxyz/index_" + emp + ".shtml");
                    }
                    catch
                    {
                        continue;
                    }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "list1")), true), new TagNameFilter("li")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    for (int j = 0; j < nodeList.Count; j++)
                    {
                        string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty,
                               bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty,
                               beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty,
                               bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty,
                               prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty,
                               area = string.Empty;
                        ATag aTag   = nodeList[j].GetATag();
                        prjName = aTag.GetAttribute("title");
                        InfoUrl = aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "tab-cnt-item current")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt   = dtlNode.AsHtml();
                            bidCtx    = HtmlTxt.Replace("</p>", "\r\n").ToCtxString();
                            beginDate = beginDate.GetDateRegex();
                            if (string.IsNullOrWhiteSpace(beginDate))
                            {
                                beginDate = bidCtx.GetRegex("发布时间");
                                beginDate = beginDate.GetDateRegex();
                            }
                            bidUnit = bidCtx.GetBidRegex();
                            if (string.IsNullOrWhiteSpace(bidMoney))
                            {
                                try
                                {
                                    bidMoney = bidCtx.GetRegex("第一标段中标金额");
                                    bidMoney = bidMoney.GetRegexBegEnd("¥", "元");
                                }
                                catch { }
                            }
                            buildUnit = bidCtx.GetBuildRegex();
                            if (string.IsNullOrWhiteSpace(buildUnit))
                            {
                                buildUnit = bidCtx.GetRegex("委托单位名称");
                            }
                            code = bidCtx.GetCodeRegex();

                            if (string.IsNullOrWhiteSpace(code))
                            {
                                code = bidCtx.GetRegexBegEnd("编号", "采购").GetReplace("\r\n", "");
                            }
                            if (string.IsNullOrWhiteSpace(bidUnit))
                            {
                                try
                                {
                                    parser = new Parser(new Lexer(htmldtl));
                                    NodeList dtl = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "content-cnt")), true), new TagNameFilter("table")));
                                    if (dtl != null && dtl.Count > 0)
                                    {
                                        TableTag dl      = dtl[0] as TableTag;
                                        string   bidCtxt = string.Empty;
                                        for (int c = 0; c < dl.Rows[0].ColumnCount; c++)
                                        {
                                            bidCtxt += dl.Rows[0].Columns[c].ToNodePlainString() + ":";
                                            bidCtxt += dl.Rows[1].Columns[c].ToNodePlainString() + "\r\n";
                                        }
                                        if (string.IsNullOrWhiteSpace(bidUnit))
                                        {
                                            bidUnit = bidCtxt.GetRegex("第一中标人");
                                        }
                                    }
                                }
                                catch { }
                            }
                            try
                            {
                                if (Convert.ToDecimal(bidMoney) > 100000)
                                {
                                    bidMoney = (decimal.Parse(bidMoney) / 10000).ToString();
                                }
                            }
                            catch { }
                            msgType  = "河源市公共资源交易中心";
                            specType = "政府采购";
                            bidType  = ToolHtml.GetInviteTypes(prjName);
                            BidInfo info = ToolDb.GenBidInfo("广东省", "河源市区", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.hyggzy.com" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }

            return(list);
        }
Ejemplo n.º 22
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <NoticeInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("nowrap", "true")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("总页数", "当前页").Replace(":", "");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "?Paging=" + i, Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "99%")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount - 1; j++)
                    {
                        string InfoTitle = string.Empty, InfoType = string.Empty, PublistTime = string.Empty, InfoCtx = string.Empty, InfoUrl = string.Empty, prjCode = string.Empty, buildUnit = string.Empty, htmlTxt = string.Empty, area = string.Empty;

                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        InfoTitle   = aTag.GetAttribute("title");
                        PublistTime = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoUrl     = "http://www.gxzbtb.cn" + aTag.Link;
                        InfoType    = "澄清公告";

                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            htmlTxt   = dtlNode.AsHtml().GetJsString();
                            InfoCtx   = htmlTxt.ToCtxString();
                            buildUnit = InfoCtx.GetBuildRegex();

                            NoticeInfo info = ToolDb.GenNoticeInfo("广西壮族自治区", "广西壮族自治区及地市", area, string.Empty, InfoTitle, InfoType, InfoCtx, PublistTime, string.Empty, "广西壮族自治区公共资源交易中心", InfoUrl, prjCode, buildUnit, string.Empty, string.Empty, "建设工程", string.Empty, htmlTxt);
                            parser = new Parser(new Lexer(htmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k].GetATag();
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.gxzbtb.cn" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }

                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 23
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default, ref cookiestr);
            }
            catch { }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "MoreInfoList1_Pager")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[0].ToNodePlainString().GetRegexBegEnd("1/", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    string __CSRFTOKEN      = ToolHtml.GetHtmlInputValue(html, "__CSRFTOKEN");
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__CSRFTOKEN",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__LASTFOCUS",
                        "__VIEWSTATE",
                        "__VIEWSTATEGENERATOR",
                        "__EVENTVALIDATION",
                        "MoreInfoList1$txtProjectName",
                        "MoreInfoList1$txtBiaoDuanName",
                        "MoreInfoList1$txtBiaoDuanNo",
                        "MoreInfoList1$txtJSDW",
                        "MoreInfoList1$StartDate",
                        "MoreInfoList1$EndDate",
                        "MoreInfoList1$jpdDi",
                        "MoreInfoList1$jpdXian"
                    }, new string[] {
                        __CSRFTOKEN,
                        "MoreInfoList1$Pager",
                        i.ToString(),
                        "",
                        viewState,
                        "76D0A3AC",
                        eventValidation,
                        "", "", "", "", "", "",
                        "-1", "-1"
                    });
                    try
                    {
                        cookiestr = cookiestr.GetReplace("path=/; HttpOnly").Replace(",", "");
                        html      = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        prjName = aTag.GetAttribute("title").GetReplace(";");
                        area    = prjName.GetReplace("[", "【").GetReplace("]", "】").GetRegexBegEnd("【", "】");
                        if (!string.IsNullOrEmpty(area))
                        {
                            prjName = prjName.GetReplace("[" + area + "]");
                        }
                        beginDate = tr.Columns[3].ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://www.jszb.com.cn/jszb/YW_info/" + aTag.GetAttribute("onclick").Replace("(", "(").GetRegexBegEnd("(", ",").GetReplace("\",../,./");
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "zygg_Text_23")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml();
                            inviteCtx  = HtmlTxt.ToCtxString();
                            prjAddress = inviteCtx.GetAddressRegex();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            if (buildUnit.Contains("公司"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                            }
                            if (buildUnit.Contains("地址"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址"));
                            }
                            code       = inviteCtx.GetCodeRegex().GetCodeDel();
                            msgType    = "江苏省建设工程招标投标办公室";
                            specType   = "建设工程";
                            inviteType = "建设工程";
                            InviteInfo info = ToolDb.GenInviteInfo("江苏省", "江苏省及地市", area, string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.jszb.com.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 24
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "yesh fl")));

            if (sNode != null && sNode.Count > 0)
            {
                try
                {
                    string temp = sNode[0].ToNodePlainString().GetRegexBegEnd("/", "页");
                    pageInt = int.Parse(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.szlhxq.gov.cn/mzbsc/zwgk69/cgzb/zbgg21/14844-" + i.ToString() + ".html", Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList viewList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "news1_list")), true), new TagNameFilter("li")));
                if (viewList != null && viewList.Count > 0)
                {
                    for (int j = 0; j < viewList.Count; j++)
                    {
                        string prjName = string.Empty,
                               buildUnit = string.Empty, bidUnit = string.Empty,
                               bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty,
                               beginDate = string.Empty,
                               endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty,
                               msgType = string.Empty, bidCtx = string.Empty,
                               prjAddress = string.Empty, remark = string.Empty,
                               prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        beginDate = viewList[j].ToNodePlainString().GetDateRegex();
                        ATag aTag = viewList[j].GetATag();
                        prjName = aTag.GetAttribute("title");
                        InfoUrl = "http://www.szlhxq.gov.cn" + aTag.Link;
                        string htmDtl = string.Empty;
                        try
                        {
                            htmDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmDtl));
                        NodeList dtl = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "tit-content")));
                        if (dtl != null && dtl.Count > 0)
                        {
                            HtmlTxt = dtl.AsHtml();
                            bidCtx  = System.Text.RegularExpressions.Regex.Replace(HtmlTxt, "(<script)[\\s\\S]*?(</script>)", "");
                            bidCtx  = System.Text.RegularExpressions.Regex.Replace(bidCtx.Replace("<br/>", "\r\n").Replace("<BR/>", "\r\n").Replace("<BR>", "\r\n").Replace("<br>", "\r\n"), "<[^>]*>", "").Replace("&nbsp;", "").Replace(" ", "").Replace("\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\r\n", "\r\n").Replace("\r\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\t", "").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n");

                            bidType = prjName.GetInviteBidType();
                            if (string.IsNullOrEmpty(bidType))
                            {
                                bidType = "工程";
                            }
                            code      = ToolHtml.GetRegexString(bidCtx, ToolHtml.CodeRegex, true, 50);
                            buildUnit = ToolHtml.GetRegexString(bidCtx, ToolHtml.BuildRegex, true, 150);
                            bidMoney  = ToolHtml.GetRegexString(bidCtx, ToolHtml.MoneyRegex, false);
                            bidUnit   = ToolHtml.GetRegexString(bidCtx, ToolHtml.BidRegex, true, 150);
                            prjMgr    = ToolHtml.GetRegexString(bidCtx, ToolHtml.MgrRegex, true, 50);
                            bidMoney  = ToolHtml.GetRegexMoney(bidMoney);
                            if (string.IsNullOrEmpty(buildUnit))
                            {
                                buildUnit = "深圳市龙华新区民治街道办事处";
                            }
                            msgType  = "深圳市龙华新区民治街道办事处";
                            specType = "建设工程";
                            bidType  = "小型工程";
                            prjName  = ToolDb.GetPrjName(prjName);
                            BidInfo info = ToolDb.GenBidInfo("广东省", "深圳区及街道工程", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType,
                                                             bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 25
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "epages")), true), new TagNameFilter("a")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[pageNode.Count - 1].GetATagHref().GetRegexBegEnd("index", "htm").Replace("_", "").Replace(".", "");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "index_" + i + ".html", Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "gzcysublist")), true), new TagNameFilter("a")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        ATag   aTag = listNode[j].GetATag();
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        prjName   = aTag.GetAttribute("title");
                        beginDate = aTag.LinkText.GetDateRegex();
                        InfoUrl   = "http://www.jxjst.gov.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "detailContent")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml();
                            inviteCtx  = HtmlTxt.ToCtxString();
                            prjAddress = inviteCtx.GetAddressRegex();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            if (buildUnit.Contains("单位章"))
                            {
                                buildUnit = string.Empty;
                            }
                            if (buildUnit.Contains("联系人"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("联系人"));
                            }
                            code       = inviteCtx.GetCodeRegex();
                            inviteType = prjName.GetInviteBidType();
                            specType   = "建设工程";
                            msgType    = "江西省住房和城乡建设厅";
                            InviteInfo info = ToolDb.GenInviteInfo("江西省", "江西省及地市", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 26
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new List <InviteInfo>();

            foreach (string siteUrl in AllSiteUrl.Keys)
            {
                int    result          = 0;
                string webUrl          = this.SiteUrl + AllSiteUrl[siteUrl];
                string html            = string.Empty;
                string cookiestr       = string.Empty;
                string viewState       = string.Empty;
                int    pageInt         = 1;
                string eventValidation = string.Empty;
                try
                {
                    html = this.ToolWebSite.GetHtmlByUrl(webUrl, Encoding.UTF8, ref cookiestr);
                }
                catch
                {
                    return(list);
                }
                Parser   parser   = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "Paging")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    string temp = nodeList.AsString().GetRegexBegEnd("总页数:", "当前");
                    try
                    {
                        pageInt = int.Parse(temp);
                    }
                    catch { }
                }
                for (int i = 1; i <= pageInt; i++)
                {
                    if (i > 1)
                    {
                        try
                        {
                            html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(webUrl + "?Paging=" + i.ToString()));
                        }
                        catch { continue; }
                    }
                    parser = new Parser(new Lexer(html));
                    NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("valign", "top")));
                    if (tableNodeList != null && tableNodeList.Count > 0)
                    {
                        TableTag table = (TableTag)tableNodeList[0];
                        for (int j = 0; j < table.RowCount - 2; j++)
                        {
                            TableRow tr   = table.Rows[j];
                            ATag     aTag = tr.GetATag();
                            if (aTag == null)
                            {
                                continue;
                            }
                            string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                                   prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                                   specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                                   remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                                   CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                            prjName   = aTag.GetAttribute("title");
                            InfoUrl   = "http://jyzx.maoming.gov.cn" + aTag.Link;
                            beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                            string htmldetail = string.Empty;
                            try
                            {
                                htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                            }
                            catch
                            {
                                continue;
                            }
                            parser = new Parser(new Lexer(htmldetail));
                            NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent")));
                            if (dtlNode != null && dtlNode.Count > 0)
                            {
                                HtmlTxt   = dtlNode.AsHtml();
                                inviteCtx = HtmlTxt.GetReplace("</p>", "\r\n").ToCtxString();

                                buildUnit  = inviteCtx.GetBuildRegex();
                                prjAddress = inviteCtx.GetAddressRegex();
                                code       = inviteCtx.GetCodeRegex().GetCodeDel();
                                inviteType = siteUrl;

                                msgType  = "茂名市公共资源交易中心";
                                specType = "建设工程";
                                InviteInfo info = ToolDb.GenInviteInfo("广东省", "茂名市区", "",
                                                                       string.Empty, code, prjName, prjAddress, buildUnit,
                                                                       beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                                list.Add(info);
                                result++;
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int a = 0; a < aNode.Count; a++)
                                    {
                                        ATag fileTag = aNode[a] as ATag;
                                        if (fileTag.IsAtagAttach())
                                        {
                                            string fileUrl = string.Empty;
                                            if (fileTag.Link.Contains("http"))
                                            {
                                                fileUrl = fileTag.Link;
                                            }
                                            else
                                            {
                                                fileUrl = "http://jyzx.maoming.gov.cn/" + fileTag.Link;
                                            }

                                            base.AttachList.Add(ToolDb.GenBaseAttach(fileTag.LinkText, info.Id, fileUrl));
                                        }
                                    }
                                }
                                if (result >= this.MaxCount && !crawlAll)
                                {
                                    goto Finish;
                                }
                            }
                        }
                    }
                }
                Finish : continue;
            }
            return(list);
        }
Ejemplo n.º 27
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "zbjgmore2_Pager")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("总页数:", "当前");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__VIEWSTATE",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__EVENTVALIDATION"
                    }, new string[] {
                        viewState,
                        "zbjgmore2$Pager",
                        i.ToString(),
                        eventValidation
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "zbjgmore2_DataGrid1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        prjName = aTag.GetAttribute("title");
                        area    = tr.ToNodePlainString().GetRegexBegEnd("【", "】");

                        InfoUrl = "http://www.lnzb.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "tblInfo")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.AsHtml();
                            bidCtx  = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString();
                            parser  = new Parser(new Lexer(HtmlTxt));
                            NodeList tableNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "_Sheet1")));
                            if (tableNode != null && tableNode.Count > 0)
                            {
                                TableTag tag = tableNode[0] as TableTag;
                                string   ctx = string.Empty;
                                for (int r = 0; r < tag.RowCount; r++)
                                {
                                    for (int c = 0; c < tag.Rows[r].ColumnCount; c++)
                                    {
                                        string temp = tag.Rows[r].Columns[c].ToNodePlainString();
                                        if ((c + 1) % 2 == 0)
                                        {
                                            ctx += temp.GetReplace(":,:") + "\r\n";
                                        }
                                        else
                                        {
                                            ctx += temp.GetReplace(":,:") + ":";
                                        }
                                    }
                                }
                                buildUnit = ctx.GetBuildRegex();
                                bidUnit   = ctx.GetBidRegex();
                                bidMoney  = ctx.GetMoneyRegex();
                                code      = ctx.GetCodeRegex();
                                prjMgr    = ctx.GetMgrRegex();
                                if (string.IsNullOrEmpty(prjMgr))
                                {
                                    prjMgr = ctx.GetRegex("项目负责人姓名");
                                }
                            }
                            beginDate = bidCtx.GetRegex("发布时间").GetDateRegex("yyyy/MM/dd");
                            msgType   = "辽宁省建设厅招标投标管理处";
                            specType  = "建设工程";
                            bidType   = "勘察设计";
                            BidInfo info = ToolDb.GenBidInfo("辽宁省", "辽宁省及地市", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.lnzb.cn/" + a.Link.GetReplace("../,./");
                                        }
                                        if (Encoding.Default.GetByteCount(link) > 500)
                                        {
                                            continue;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 28
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list = new List <NotifyInfo>();
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default, ref cookiestr);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "MoreInfoList1_Pager")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[0].ToPlainTextString().GetRegexBegEnd("总页数", "当前").Replace(":", "");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState = this.ToolWebSite.GetAspNetViewState(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(
                        new string[] {
                        "__VIEWSTATE",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT"
                    },
                        new string[] {
                        viewState,
                        "MoreInfoList1$Pager",
                        i.ToString()
                    }
                        );
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string   headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        headName    = aTag.GetAttribute("title");
                        releaseTime = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        infoUrl     = "http://www.gaxqjyzx.com" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            ctxHtml = dtlNode.AsHtml();
                            infoCtx = ctxHtml.ToCtxString();


                            msgType = "贵安新区公共资源交易中心";
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "贵州省", "贵州省及地市", "贵安新区", infoCtx, "通知公告");
                            sqlCount++;
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                            {
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag fileATag = aNode[k].GetATag();
                                        if (fileATag.IsAtagAttach())
                                        {
                                            BaseAttach obj = null;
                                            try
                                            {
                                                if (fileATag.Link.ToLower().Contains("http"))
                                                {
                                                    obj = ToolHtml.GetBaseAttach(fileATag.Link, headName, info.Id);
                                                }
                                                else
                                                {
                                                    obj = ToolHtml.GetBaseAttach("http://www.gaxqjyzx.com" + fileATag.Link, headName, info.Id);
                                                }
                                            }
                                            catch { }
                                            if (obj != null)
                                            {
                                                ToolDb.SaveEntity(obj, string.Empty);
                                            }
                                        }
                                    }
                                }
                                else
                                {
                                    parser.Reset();
                                    NodeList imgNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                                    if (imgNode != null && imgNode.Count > 0)
                                    {
                                        for (int k = 0; k < imgNode.Count; k++)
                                        {
                                            ImageTag   img = imgNode[0] as ImageTag;
                                            BaseAttach obj = null;
                                            try
                                            {
                                                if (img.ImageURL.ToLower().Contains("http"))
                                                {
                                                    obj = ToolHtml.GetBaseAttach(img.ImageURL, headName, info.Id);
                                                }
                                                else
                                                {
                                                    obj = ToolHtml.GetBaseAttach("http://www.gaxqjyzx.com" + img.ImageURL, headName, info.Id);
                                                }
                                            }
                                            catch { }
                                            if (obj != null)
                                            {
                                                ToolDb.SaveEntity(obj, string.Empty);
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 29
0
        protected List <BidInfo> AddZbgs()
        {
            string         url             = "http://ztb.gxi.gov.cn/ztbgg/zbgs/";
            List <BidInfo> list            = new List <BidInfo>();
            int            pageInt         = 1;
            string         html            = string.Empty;
            string         viewState       = string.Empty;
            string         eventValidation = string.Empty;
            string         cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(url, Encoding.Default);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "pl")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().Replace("(", "kdxx").Replace(",", "xxdk").GetRegexBegEnd("kdxx", "xxdk");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(url + "index_" + i + ".htm", Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("tr"), new HasAttributeFilter("id", "OutlineContent")), true), new TagNameFilter("table")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string prjName = string.Empty,
                               buildUnit = string.Empty, bidUnit = string.Empty,
                               bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty, beginDate = string.Empty,
                               endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty,
                               msgType = string.Empty, bidCtx = string.Empty,
                               prjAddress = string.Empty, remark = string.Empty,
                               prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;

                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        prjName   = aTag.GetAttribute("title");
                        beginDate = tr.Columns[1].ToPlainTextString().GetDateRegex();

                        InfoUrl = "http://ztb.gxi.gov.cn/ztbgg/zbgs/" + aTag.Link.GetReplace("./");
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "p1")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.AsHtml();
                            bidCtx  = HtmlTxt.ToCtxString();
                            parser  = new Parser(new Lexer(HtmlTxt));
                            NodeList tableNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "MsoNormalTable")));
                            if (tableNode != null && tableNode.Count > 0)
                            {
                                string   ctx      = string.Empty;
                                TableTag dtlTable = tableNode[0] as TableTag;
                                for (int r = 0; r < dtlTable.RowCount; r++)
                                {
                                    for (int c = 0; c < dtlTable.Rows[r].ColumnCount; c++)
                                    {
                                        string temp = dtlTable.Rows[r].Columns[c].ToNodePlainString();
                                        if (c % 2 == 0)
                                        {
                                            ctx += temp + ":";
                                        }
                                        else
                                        {
                                            ctx += temp + "\r\n";
                                        }
                                    }
                                }

                                string projectName = ctx.GetRegex("项目名称,工程名称");
                                if (!string.IsNullOrWhiteSpace(projectName))
                                {
                                    prjName = projectName;
                                }
                                code = ctx.GetCodeRegex().GetCodeDel();

                                bidUnit = ctx.GetBidRegex();
                                if (bidUnit.Contains("单位名称") || string.IsNullOrWhiteSpace(bidUnit))
                                {
                                    bidUnit = ctx.GetRegex("单位名称");
                                }
                                bidMoney = ctx.GetMoneyRegex(null, false, "万元");

                                prjMgr    = ctx.GetMgrRegex();
                                buildUnit = ctx.GetBuildRegex();
                                if (bidUnit.IsNumber())
                                {
                                    if (string.IsNullOrWhiteSpace(bidMoney) || bidMoney == "0")
                                    {
                                        bidMoney = bidUnit;
                                    }
                                    bidUnit = ctx.GetRegex("单位名称");
                                }
                            }
                            else
                            {
                                bidUnit   = bidCtx.GetBidRegex();
                                bidMoney  = bidCtx.GetMoneyRegex();
                                prjMgr    = bidCtx.GetMgrRegex();
                                buildUnit = bidCtx.GetBuildRegex();
                                code      = bidCtx.GetCodeRegex().GetCodeDel();
                            }

                            if (bidUnit.Contains("公司"))
                            {
                                bidUnit = bidUnit.Remove(bidUnit.LastIndexOf("公司")) + "公司";
                            }
                            if (bidUnit.Contains("确定为"))
                            {
                                bidUnit = bidUnit.Remove(0, bidUnit.IndexOf("确定为")).Replace("确定为", "");
                            }
                            if (buildUnit.Contains("公司"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                            }
                            if (prjMgr.Contains("("))
                            {
                                prjMgr = prjMgr.Remove(prjMgr.IndexOf("("));
                            }
                            if (prjMgr.Contains("("))
                            {
                                prjMgr = prjMgr.Remove(prjMgr.IndexOf("("));
                            }
                            if (prjMgr.Contains("项目总工"))
                            {
                                prjMgr = prjMgr.Remove(prjMgr.IndexOf("项目总工"));
                            }
                            if (prjMgr.Contains("注册"))
                            {
                                prjMgr = prjMgr.Remove(prjMgr.IndexOf("注册"));
                            }

                            try
                            {
                                if (decimal.Parse(bidMoney) > 100000)
                                {
                                    bidMoney = (decimal.Parse(bidMoney) / 10000).ToString();
                                }
                                if (decimal.Parse(bidMoney) < 1)
                                {
                                    bidMoney = "0";
                                }
                            }
                            catch { }
                            bidUnit = bidUnit.Replace(" ", "");
                            prjMgr  = prjMgr.Replace(" ", "");
                            if (bidUnit.Contains("中标价"))
                            {
                                bidUnit = "";
                            }
                            specType = "政府采购";
                            bidType  = prjName.GetInviteBidType();
                            msgType  = "广西壮族自治区发展和改革委员会";
                            BidInfo info = ToolDb.GenBidInfo("广西壮族自治区", "广西壮族自治区及地市", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://ztb.gxi.gov.cn/" + a.Link.GetReplace("../,./");
                                        }
                                        if (Encoding.Default.GetByteCount(link) > 500)
                                        {
                                            continue;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }

            return(list);
        }
Ejemplo n.º 30
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default, ref cookiestr);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("nowrap", "true")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("总页数", "当前页").Replace(":", "");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState = this.ToolWebSite.GetAspNetViewState(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__VIEWSTATE",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT"
                    }, new string[] {
                        viewState,
                        "MoreInfoList1$Pager",
                        i.ToString()
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string prjName = string.Empty,
                               buildUnit = string.Empty, bidUnit = string.Empty,
                               bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty,
                               beginDate = string.Empty,
                               endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty,
                               msgType = string.Empty, bidCtx = string.Empty,
                               prjAddress = string.Empty, remark = string.Empty,
                               prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        prjName   = aTag.GetAttribute("title");
                        beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://www.hbggzy.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml();
                            bidCtx     = HtmlTxt.GetReplace("</p>,<br />,<br/>", "\r\n").ToCtxString().GetReplace("\t", "\r\n");
                            prjAddress = bidCtx.GetAddressRegex();
                            buildUnit  = bidCtx.GetBuildRegex();
                            bidUnit    = bidCtx.GetBidRegex();
                            bidMoney   = bidCtx.GetMoneyRegex();
                            if (string.IsNullOrWhiteSpace(bidUnit))
                            {
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                                if (tableNode != null && tableNode.Count > 0)
                                {
                                    TableTag tableTag = tableNode[0] as TableTag;
                                    string   ctx      = string.Empty;
                                    for (int r = 0; r < tableTag.RowCount; r++)
                                    {
                                        for (int c = 0; c < tableTag.Rows[r].ColumnCount; c++)
                                        {
                                            string temp = tableTag.Rows[r].Columns[c].ToNodePlainString();
                                            if (string.IsNullOrWhiteSpace(temp))
                                            {
                                                continue;
                                            }
                                            if (temp.Contains("中标人") || temp.Contains("中标单位"))
                                            {
                                                try
                                                {
                                                    ctx += temp.GetReplace(":,:") + ":" + tableTag.Rows[r + 1].Columns[c].ToNodePlainString() + "\r\n";
                                                    ctx += tableTag.Rows[r].Columns[c + 1].ToNodePlainString().GetReplace(":,:") + ":" + tableTag.Rows[r + 1].Columns[c + 1].ToNodePlainString() + "\r\n";
                                                }
                                                catch { }
                                                r++;
                                                break;
                                            }
                                            else
                                            {
                                                if ((c + 1) % 2 == 0)
                                                {
                                                    ctx += temp.GetReplace(":,:") + "\r\n";
                                                }
                                                else
                                                {
                                                    ctx += temp.GetReplace(":,:") + ":";
                                                }
                                            }
                                        }
                                    }
                                    bidUnit  = ctx.GetBidRegex();
                                    bidMoney = ctx.GetMoneyRegex();
                                }
                            }
                            if (buildUnit.Contains("公司"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                            }
                            if (buildUnit.Contains("地址"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址"));
                            }
                            if (buildUnit.Contains("联系"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("联系"));
                            }
                            if (buildUnit.Contains("指挥部"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("指挥部"));
                            }
                            code = bidCtx.GetCodeRegex().GetCodeDel().GetReplace(".");
                            if (bidUnit.Contains("日历天") || bidUnit.Contains("预期中标") || bidUnit.Contains("投标人") || bidUnit.Contains("中标价"))
                            {
                                bidUnit = string.Empty;
                            }
                            msgType   = "湖北省公共资源交易中心";
                            specType  = "政府采购";
                            bidType   = "水利工程";
                            buildUnit = buildUnit.Replace(" ", "");
                            BidInfo info = ToolDb.GenBidInfo("湖北省", "湖北省及地市", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.hbggzy.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }