Ejemplo n.º 1
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "PagingControl_lblPage")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[0].ToNodePlainString();
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    string _VIEWSTATEGENERATOR = ToolHtml.GetHtmlInputValue(html, "__VIEWSTATEGENERATOR");
                    NameValueCollection nvc    = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__VIEWSTATE",
                        "__VIEWSTATEGENERATOR",
                        "__EVENTVALIDATION",
                        "PagingControl$tbxpidex",
                        "PagingControl$btnGo"
                    }, new string[] {
                        "",
                        "",
                        viewState,
                        _VIEWSTATEGENERATOR,
                        eventValidation,
                        i.ToString(),
                        "go"
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "right")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        INode node = listNode[j];

                        ATag aTag = node.GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }

                        string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty,
                               bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty, beginDate = string.Empty,
                               endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty,
                               msgType = string.Empty, bidCtx = string.Empty,
                               prjAddress = string.Empty, remark = string.Empty,
                               prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        prjName   = aTag.GetAttribute("title").Replace(" ", "");
                        beginDate = node.ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://www.wzgcjsx2.gx.cn/" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "Center_Introduction")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt  = dtlNode.AsHtml();
                            bidCtx   = HtmlTxt.GetReplace("</p>,</br>", "\r\n").ToCtxString();
                            bidUnit  = bidCtx.GetBidRegex();
                            bidMoney = bidCtx.GetMoneyRegex(null, false, "万元");
                            prjMgr   = bidCtx.GetMgrRegex();
                            if (string.IsNullOrWhiteSpace(bidUnit))
                            {
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                                if (tableNode != null && tableNode.Count > 0)
                                {
                                    string   ctx   = string.Empty;
                                    TableTag table = tableNode[0] as TableTag;
                                    for (int r = 0; r < table.RowCount; r++)
                                    {
                                        for (int c = 0; c < table.Rows[r].ColumnCount; c++)
                                        {
                                            string temp = table.Rows[r].Columns[c].ToNodePlainString();
                                            if (c % 2 == 0)
                                            {
                                                ctx += temp + ":";
                                            }
                                            else
                                            {
                                                ctx += temp + "\r\n";
                                            }
                                        }
                                    }
                                    bidUnit = ctx.GetBidRegex();
                                    if (string.IsNullOrWhiteSpace(bidMoney) || bidMoney == "0")
                                    {
                                        bidMoney = ctx.GetMoneyRegex(null, false, "万元");
                                    }
                                    if (string.IsNullOrWhiteSpace(prjMgr))
                                    {
                                        prjMgr = ctx.GetMgrRegex();
                                    }
                                    code      = ctx.GetCodeRegex().GetCodeDel();
                                    buildUnit = ctx.GetBuildRegex();
                                }
                                else
                                {
                                    code      = bidCtx.GetCodeRegex().GetCodeDel();
                                    buildUnit = bidCtx.GetBuildRegex();
                                }
                            }
                            else
                            {
                                code      = bidCtx.GetCodeRegex().GetCodeDel();
                                buildUnit = bidCtx.GetBuildRegex();
                            }
                            try
                            {
                                if (decimal.Parse(bidMoney) > 10000)
                                {
                                    bidMoney = (decimal.Parse(bidMoney) / 10000).ToString();
                                }
                            }
                            catch { }
                            msgType  = "梧州市公共资源交易中心";
                            specType = "建设工程";
                            bidType  = prjName.GetInviteBidType();
                            BidInfo info = ToolDb.GenBidInfo("广西壮族自治区", "广西壮族自治区及地市", "梧州市", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.wzgcjsx2.gx.cn/" + a.Link.GetReplace("../,./");
                                        }
                                        if (Encoding.Default.GetByteCount(link) > 500)
                                        {
                                            continue;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 2
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            //取得页码
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch (Exception ex)
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "scott")), true), new TagNameFilter("a")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    string temp = pageList[pageList.Count - 1].GetATagValue().Replace("(", "kdxx").Replace(")", "xxdk").GetRegexBegEnd("kdxx", "xxdk");
                    pageInt = Convert.ToInt32(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        string typeId           = ToolHtml.GetHtmlInputValue(html, "typeId");
                        string boardId          = ToolHtml.GetHtmlInputValue(html, "boardId");
                        string totalRows        = ToolHtml.GetHtmlInputValue(html, "totalRows");
                        NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                            "typeId", "boardId", "totalRows", "pageNO"
                        }, new string[] {
                            typeId, boardId, totalRows, i.ToString()
                        });
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "lefttable")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty,
                               infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        TableRow tr = table.Rows[j];
                        infoType    = "政策法规";
                        headName    = tr.Columns[1].ToNodePlainString();
                        releaseTime = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        infoUrl     = tr.Columns[1].GetATagHref();
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "context_div")));
                        if (dtlList != null && dtlList.Count > 0)
                        {
                            ctxHtml = dtlList.AsHtml();
                            infoCtx = ctxHtml.ToCtxString();
                            msgType = MsgTypeCosnt.ZhongShanMsgType;
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "中山市区", string.Empty, infoCtx, infoType);
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            else
                            {
                                sqlCount++;
                                if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                                {
                                    parser = new Parser(new Lexer(ctxHtml));
                                    NodeList imgList = parser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                                    if (imgList != null && imgList.Count > 0)
                                    {
                                        for (int img = 0; img < imgList.Count; img++)
                                        {
                                            ImageTag   imgTag   = imgList[img] as ImageTag;
                                            BaseAttach baseInfo = ToolHtml.GetBaseAttachByUrl(imgTag.GetAttribute("src"), headName, info.Id);
                                            if (baseInfo != null)
                                            {
                                                ToolDb.SaveEntity(baseInfo, string.Empty);
                                            }
                                        }
                                    }
                                    parser = new Parser(new Lexer(ctxHtml));
                                    NodeList attachList = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                    if (attachList != null && attachList.Count > 0)
                                    {
                                        for (int a = 0; a < attachList.Count; a++)
                                        {
                                            ATag aTag = attachList[a] as ATag;
                                            if (aTag.IsAtagAttach())
                                            {
                                                try
                                                {
                                                    BaseAttach obj = ToolHtml.GetBaseAttachByUrl(aTag.Link, aTag.LinkText, info.Id);
                                                    if (obj != null)
                                                    {
                                                        ToolDb.SaveEntity(obj, string.Empty);
                                                    }
                                                }
                                                catch { }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
Ejemplo n.º 3
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookieStr       = string.Empty;
            int    pageInt         = 1;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default).GetJsString();
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "toptd1")), true), new TagNameFilter("a")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    string temp = pageList[pageList.Count - 1].GetATagValue();
                    pageInt = Convert.ToInt32(temp.Replace("javascript:goPage(", "").Replace(")", ""));
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        string typeId           = ToolHtml.GetHtmlInputValue(html, "typeId");
                        string boardId          = ToolHtml.GetHtmlInputValue(html, "boardId");
                        string totalRows        = ToolHtml.GetHtmlInputValue(html, "totalRows");
                        NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(
                            new string[] {
                            "typeId", "boardId", "newstitle", "sTime", "eTime", "totalRows", "pageNO"
                        },
                            new string[] { typeId, boardId, "", "", "", totalRows, i.ToString() }
                            );
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "lefttable")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        string InfoTitle = string.Empty, InfoType = string.Empty, PublistTime = string.Empty, InfoCtx = string.Empty, InfoUrl = string.Empty, prjCode = string.Empty, buildUnit = string.Empty, htmlTxt = string.Empty;

                        TableRow tr = table.Rows[j];
                        InfoTitle = tr.Columns[1].ToNodePlainString();
                        string endDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoType = "资格预审";
                        InfoUrl  = tr.Columns[1].GetATagHref();

                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }

                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "context_div")));
                        if (dtlList != null && dtlList.Count > 0)
                        {
                            htmlTxt     = dtlList.ToHtml();
                            InfoCtx     = dtlList.ToHtml().ToCtxString().Replace("<?xml:namespace prefix = o ns = \"urn:schemas-microsoft-com:office:office\" />", "");
                            PublistTime = InfoCtx.GetDateRegex("yyyy年MM月dd日").Replace("年", "-").Replace("月", "-").Replace("日", "");
                            if (string.IsNullOrEmpty(PublistTime))
                            {
                                PublistTime = InfoCtx.GetDateRegex();
                            }
                            if (string.IsNullOrEmpty(PublistTime))
                            {
                                PublistTime = endDate;
                            }
                            NoticeInfo info = ToolDb.GenNoticeInfo("广东省", "惠州市区", string.Empty, string.Empty, InfoTitle, InfoType, InfoCtx, PublistTime, string.Empty, MsgTypeCosnt.HuiZhouMsgType, InfoUrl, prjCode, buildUnit, string.Empty, string.Empty, string.Empty, string.Empty, htmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 4
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new List <InviteInfo>();

            Dictionary <string, string> citys = this.GetCitys();

            foreach (string area in citys.Keys)
            {
                int    count           = 0;
                int    pageInt         = 1;
                string html            = string.Empty;
                string viewState       = string.Empty;
                string eventValidation = string.Empty;
                string cookiestr       = string.Empty;
                try
                {
                    html = this.ToolWebSite.GetHtmlByUrl(citys[area], Encoding.UTF8, ref cookiestr);
                }
                catch { return(list); }
                Parser   parser   = new Parser(new Lexer(html));
                NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("nowrap", "true")));
                if (pageNode != null && pageNode.Count > 0)
                {
                    try
                    {
                        string temp = pageNode.AsString().GetRegexBegEnd("总页数", "当前页").Replace(":", "");
                        pageInt = int.Parse(temp);
                    }
                    catch { }
                }
                for (int i = 1; i <= pageInt; i++)
                {
                    if (i > 1)
                    {
                        viewState       = this.ToolWebSite.GetAspNetViewState(html);
                        eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                        string viewSTATEGENERATOR = ToolHtml.GetHtmlInputValue(html, "__VIEWSTATEGENERATOR");
                        NameValueCollection nvc   = this.ToolWebSite.GetNameValueCollection(new string[] {
                            "__VIEWSTATE",
                            "__VIEWSTATEGENERATOR",
                            "__EVENTTARGET",
                            "__EVENTARGUMENT",
                            "__EVENTVALIDATION",
                            "MoreInfoList1$txtTitle"
                        },
                                                                                            new string[] {
                            viewState,
                            viewSTATEGENERATOR,
                            "MoreInfoList1$Pager",
                            i.ToString(),
                            eventValidation,
                            ""
                        });
                        try
                        {
                            html = this.ToolWebSite.GetHtmlByUrl(citys[area], nvc, Encoding.UTF8, ref cookiestr);
                        }
                        catch { continue; }
                    }
                    parser = new Parser(new Lexer(html));
                    NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1")));
                    if (listNode != null && listNode.Count > 0)
                    {
                        TableTag table = listNode[0] as TableTag;
                        for (int j = 1; j < table.RowCount; j++)
                        {
                            string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                                   prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                                   specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                                   remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                                   CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                            TableRow tr   = table.Rows[j];
                            ATag     aTag = tr.Columns[1].GetATag();
                            prjName   = aTag.GetAttribute("title").GetReplace("【正在报名】,【报名结束】");
                            beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                            InfoUrl   = "http://www.gxzbtb.cn" + aTag.Link;
                            string htmldtl = string.Empty;
                            try
                            {
                                htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                            }
                            catch { continue; }
                            parser = new Parser(new Lexer(htmldtl));
                            NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent")));
                            if (dtlNode != null && dtlNode.Count > 0)
                            {
                                HtmlTxt    = dtlNode.AsHtml();
                                inviteCtx  = HtmlTxt.ToCtxString();
                                prjAddress = inviteCtx.GetAddressRegex().GetReplace(" ");
                                buildUnit  = inviteCtx.GetBuildRegex().GetReplace(" ");
                                if (buildUnit.Contains("公司"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                                }
                                if (buildUnit.Contains("地址"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址")) + "地址";
                                }
                                code       = inviteCtx.GetCodeRegex().GetCodeDel().GetReplace(" ");
                                msgType    = "广西壮族自治区公共资源交易中心";
                                specType   = "建设工程";
                                inviteType = "水利水电";
                                buildUnit  = buildUnit.Replace(" ", "");
                                InviteInfo info = ToolDb.GenInviteInfo("广西壮族自治区", "广西壮族自治区及地市", area, string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                                list.Add(info);
                                count++;
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k] as ATag;
                                        if (a.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = a.Link;
                                            }
                                            else
                                            {
                                                link = "http://www.gxzbtb.cn/" + a.Link.GetReplace("../,./");
                                            }
                                            if (Encoding.Default.GetByteCount(link) > 500)
                                            {
                                                continue;
                                            }
                                            BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                            base.AttachList.Add(attach);
                                        }
                                    }
                                }
                                if (!crawlAll && count >= this.MaxCount)
                                {
                                    goto Funcs;
                                }
                            }
                        }
                    }
                }
                Funcs :;
            }
            return(list);
        }
Ejemplo n.º 5
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <NoticeInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("vAlign", "bottom")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("总页数:", "当前");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState = this.ToolWebSite.GetAspNetViewState(html);
                    string __CSRFTOKEN      = ToolHtml.GetHtmlInputValue(html, "__CSRFTOKEN");
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__CSRFTOKEN",
                        "__VIEWSTATE",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT"
                    },
                                                                                      new string[] {
                        __CSRFTOKEN,
                        viewState,
                        "MoreInfoList1$Pager",
                        i.ToString()
                    });
                    try
                    {
                        cookiestr = cookiestr.GetReplace(new string[] { "path=/;", "HttpOnly", "," });
                        html      = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string   InfoTitle = string.Empty, InfoType = string.Empty, PublistTime = string.Empty, InfoCtx = string.Empty, InfoUrl = string.Empty, htmlTxt = string.Empty;
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        InfoTitle = aTag.GetAttribute("title");
                        if (Encoding.Default.GetByteCount(InfoTitle) > 150)
                        {
                            InfoTitle = aTag.LinkText;
                        }
                        PublistTime = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoUrl     = "http://www.spprec.com" + aTag.Link;
                        InfoType    = "变更公告";
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent")));
                        if (dtlNode == null || dtlNode.Count < 1)
                        {
                            dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "ivs_content")));
                        }

                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            htmlTxt = dtlNode.AsHtml();
                            InfoCtx = htmlTxt.GetReplace("<br />,<br/>,<br>,</p>", "\r\n").ToCtxString();

                            NoticeInfo info = ToolDb.GenNoticeInfo("四川省", "四川省及地市", string.Empty, string.Empty, InfoTitle, InfoType, InfoCtx, PublistTime, string.Empty, "四川省公共资源交易中心", InfoUrl, string.Empty, string.Empty, string.Empty, string.Empty, "政府采购", string.Empty, htmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(htmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag tag = aNode[k] as ATag;
                                    if (tag.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (tag.Link.ToLower().Contains("http"))
                                        {
                                            link = tag.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.spprec.com" + tag.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(tag.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                        else
                        {
                            Logger.Error("无内容" + InfoUrl);
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 6
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default, ref cookiestr);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "bulletininfotable_toolbarTable")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("/", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    string bulletininfotable_totalpages = ToolHtml.GetHtmlInputValue(html, "bulletininfotable_totalpages");
                    string bulletininfotable_totalrows  = ToolHtml.GetHtmlInputValue(html, "bulletininfotable_totalrows");
                    NameValueCollection nvc             = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "ec_i",
                        "bulletininfotable_efn",
                        "bulletininfotable_crd",
                        "bulletininfotable_p",
                        "bulletininfotable_s_bulletintitle",
                        "bulletininfotable_s_finishday",
                        "hySort",
                        "findAjaxZoneAtClient",
                        "method",
                        "bulletinclass",
                        "bulletininfotable_totalpages",
                        "bulletininfotable_totalrows",
                        "bulletininfotable_pg",
                        "bulletininfotable_rd"
                    },
                                                                                                  new string[] {
                        "bulletininfotable",
                        "",
                        "20",
                        i.ToString(),
                        "",
                        "",
                        "2",
                        "false",
                        "bulletinMore",
                        "01",
                        bulletininfotable_totalpages,
                        bulletininfotable_totalrows,
                        (i - 1).ToString(),
                        "5"
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html.Replace("tbody", "table")));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "bulletininfotable_table_body")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string   prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;
                        TableRow tr = table.Rows[j];
                        prjName   = tr.Columns[0].ToNodePlainString();
                        beginDate = tr.Columns[1].ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://www.ynggzy.net/bulletin.do?method=showbulletin&bulletin_id=" + tr.GetAttribute("id");
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = ToolHtml.GetHtmlByUrl(this.SiteUrl, InfoUrl, Encoding.Default);
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("body"));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml();
                            bidCtx     = HtmlTxt.ToLower().GetReplace("</p>,<br />,<br/>,<br>", "\r\n").ToCtxString();
                            buildUnit  = bidCtx.GetBuildRegex();
                            prjAddress = bidCtx.GetAddressRegex();
                            code       = bidCtx.GetCodeRegex();
                            bidType    = prjName.GetInviteBidType();
                            bidUnit    = bidCtx.GetBidRegex();
                            if (string.IsNullOrWhiteSpace(bidUnit))
                            {
                                bidUnit = bidCtx.GetRegex("成交人,成交供应商");
                            }
                            bidMoney = bidCtx.GetMoneyRegex();
                            prjMgr   = bidCtx.GetMgrRegex();
                            if (string.IsNullOrWhiteSpace(bidUnit))
                            {
                                bidUnit = bidCtx.GetRegexBegEnd("确定中标供应商为", ",");
                                if (string.IsNullOrWhiteSpace(bidMoney) || bidMoney == "0")
                                {
                                    bidMoney = bidCtx.GetRegexBegEnd("投标报价为", "万元");
                                }
                                if (string.IsNullOrWhiteSpace(bidUnit))
                                {
                                    parser = new Parser(new Lexer(HtmlTxt));
                                    NodeList bidNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                                    if (bidNode != null && bidNode.Count > 0)
                                    {
                                        string   ctx = string.Empty;
                                        TableTag tag = bidNode[0] as TableTag;
                                        for (int r = 0; r < tag.RowCount; r++)
                                        {
                                            for (int c = 0; c < tag.Rows[r].ColumnCount; c++)
                                            {
                                                string temp = tag.Rows[r].Columns[c].ToNodePlainString();
                                                if ((c + 1) % 2 == 0)
                                                {
                                                    ctx += temp + "\r\n";
                                                }
                                                else
                                                {
                                                    ctx += temp + ":";
                                                }
                                            }
                                        }
                                        bidUnit = ctx.GetBidRegex();
                                        if (string.IsNullOrEmpty(bidUnit))
                                        {
                                            bidUnit = ctx.GetRegex("入围供应商,成交人,单位名称");
                                        }
                                        if (string.IsNullOrWhiteSpace(bidMoney) || bidMoney == "0")
                                        {
                                            bidMoney = ctx.GetMoneyRegex();
                                        }

                                        if (string.IsNullOrWhiteSpace(bidMoney) || bidMoney == "0")
                                        {
                                            bidMoney = ctx.GetMoneyString().GetMoney();
                                        }
                                    }
                                }
                            }
                            if (buildUnit.Contains("联系"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("联系"));
                            }
                            if (buildUnit.Contains("公司"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                            }
                            if (buildUnit.Contains("地址"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址"));
                            }
                            if (bidUnit.Contains("地址"))
                            {
                                bidUnit = bidUnit.Remove(bidUnit.IndexOf("地址"));
                            }
                            if (bidUnit.Contains("公司"))
                            {
                                bidUnit = bidUnit.Remove(bidUnit.IndexOf("公司")) + "公司";
                            }
                            bidUnit = bidUnit.GetReplace("第一,1");
                            if (bidUnit.Contains("综合") || bidUnit.Contains("报价") || bidUnit.Contains("联系") || bidUnit.Contains("投标单位") || bidUnit.Contains("得分") || bidUnit.Contains("中标价"))
                            {
                                bidUnit = string.Empty;
                            }
                            try
                            {
                                if (decimal.Parse(bidMoney) > 100000)
                                {
                                    bidMoney = (decimal.Parse(bidMoney) / 10000).ToString();
                                }
                            }
                            catch { }
                            specType = "建设工程";
                            msgType  = "云南省公共资源交易中心";
                            BidInfo info = ToolDb.GenBidInfo("云南省", "云南省及地市", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string     obj    = a.Link.GetReplace("(", "(").GetRegexBegEnd("(", ",").GetReplace("(").GetReplace("'").Replace(",", "");
                                        string     name   = a.Link.GetReplace(")", ")").GetRegexBegEnd(",", ")").GetReplace(")").GetReplace("'").Replace(",", "");
                                        string     link   = "http://www.ynggzy.net/resource/bulletin.do?method=mdownloadFile&file_id=" + obj + "&file_name=" + name;
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 7
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default, ref cookiestr);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "bulletininfotable_toolbarTable")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("/", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    string bulletininfotable_totalpages = ToolHtml.GetHtmlInputValue(html, "bulletininfotable_totalpages");
                    string bulletininfotable_totalrows  = ToolHtml.GetHtmlInputValue(html, "bulletininfotable_totalrows");
                    NameValueCollection nvc             = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "ec_i",
                        "bulletininfotable_efn",
                        "bulletininfotable_crd",
                        "bulletininfotable_p",
                        "bulletininfotable_s_bulletintitle",
                        "bulletininfotable_s_finishday",
                        "hySort",
                        "findAjaxZoneAtClient",
                        "method",
                        "bulletinclass",
                        "bulletininfotable_totalpages",
                        "bulletininfotable_totalrows",
                        "bulletininfotable_pg",
                        "bulletininfotable_rd"
                    },
                                                                                                  new string[] {
                        "bulletininfotable",
                        "",
                        "20",
                        i.ToString(),
                        "",
                        "",
                        "1",
                        "false",
                        "bulletinMore",
                        "01",
                        bulletininfotable_totalpages,
                        bulletininfotable_totalrows,
                        (i - 1).ToString(),
                        "5"
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html.Replace("tbody", "table")));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "bulletininfotable_table_body")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty,
                               city = string.Empty;

                        TableRow tr = table.Rows[j];
                        prjName   = tr.Columns[0].ToNodePlainString();
                        beginDate = tr.Columns[1].ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://www.ynggzy.net/bulletin.do?method=showbulletin&bulletin_id=" + tr.GetAttribute("id");
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = ToolHtml.GetHtmlByUrl(this.SiteUrl, InfoUrl, Encoding.Default);
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("body"));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml();
                            inviteCtx  = HtmlTxt.ToLower().GetReplace("</p>,<br />,<br/>,<br>", "\r\n").ToCtxString();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            prjAddress = inviteCtx.GetAddressRegex();
                            code       = inviteCtx.GetCodeRegex().GetCodeDel();
                            inviteType = prjName.GetInviteBidType();
                            specType   = "政府采购";
                            msgType    = "云南省公共资源交易中心";
                            InviteInfo info = ToolDb.GenInviteInfo("云南省", "云南省及地市", city, string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.ynggzy.net/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 8
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            string newUrl = "http://202.104.65.182:8081/G2/gfmweb/web-enterprise!list.do?data&filter_params_=enterpriseId,rowNum,enterpriseBaseId,enterpriseName,organizationCode&defined_operations_=&nocheck_operations_=&";

            string gridSearch           = "true";
            string nd                   = ToolHtml.GetDateTimeLong(DateTime.Now).ToString();
            string PAGESIZE             = "100";
            string PAGE                 = "1";
            string sortField            = "";
            string sortDirection        = "asc";
            string searchVal            = "1";
            string _enterpriseName_like = "公司";
            string entTypeCodes         = "";

            NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] {
                "gridSearch", "nd", "PAGESIZE", "PAGE", "sortField", "sortDirection", "searchVal", "_enterpriseName_like", "entTypeCodes"
            }, new string[] {
                gridSearch, nd, PAGESIZE, PAGE, sortField, sortDirection, searchVal, _enterpriseName_like, entTypeCodes
            });

            string html    = string.Empty;
            int    pageInt = 1;

            try
            {
                html = ToolWeb.GetHtmlByUrl(newUrl, nvc, Encoding.UTF8);
            }
            catch { return(null); }

            JavaScriptSerializer        serializer  = new JavaScriptSerializer();
            Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html);

            string page = smsTypeJson["total"].ToString();

            pageInt = int.Parse(page);

            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    PAGE = i.ToString();
                    nvc  = ToolWeb.GetNameValueCollection(new string[] {
                        "gridSearch", "nd", "PAGESIZE", "PAGE", "sortField", "sortDirection", "searchVal", "_enterpriseName_like", "entTypeCodes"
                    }, new string[] {
                        gridSearch, nd, PAGESIZE, PAGE, sortField, sortDirection, searchVal, _enterpriseName_like, entTypeCodes
                    });
                    try
                    {
                        html        = ToolWeb.GetHtmlByUrl(newUrl, nvc, Encoding.UTF8);
                        smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html);
                    }
                    catch { continue; }
                }

                object[] objList = (object[])smsTypeJson["data"];

                foreach (object obj in objList)
                {
                    Dictionary <string, object> dic = obj as Dictionary <string, object>;

                    string CorpName = string.Empty, CorpCode = string.Empty, CorpAddress = string.Empty,
                           RegDate = string.Empty, RegFund = string.Empty, BusinessCode = string.Empty,
                           BusinessType = string.Empty, LinkMan = string.Empty, LinkPhone = string.Empty,
                           Fax = string.Empty, Email = string.Empty, CorpSite = string.Empty, cUrl = string.Empty, ISOQualNum = string.Empty, ISOEnvironNum = string.Empty, corpType = string.Empty,
                           qualCode = string.Empty, corpMgr = string.Empty, businessMgr = string.Empty, tecMgr = string.Empty;

                    CorpName = Convert.ToString(dic["enterpriseName"]);

                    CorpCode = Convert.ToString(dic["organizationCode"]);
                    string idCode       = Convert.ToString(dic["enterpriseBaseId"]);
                    string enterpriseId = Convert.ToString(dic["enterpriseId"]);
                    cUrl = "http://202.104.65.182:8081/G2/webdrive/web-enterprise!view.do?enterpriseId=" + enterpriseId;

                    //string infoUrl = "http://202.104.65.182:8081/G2/webdrive/web-enterprise-pub!getEnterpriseInfoById.do";
                    //string infoUrl2 = "http://202.104.65.182:8081/G2/webdrive/web-enterprise-pub!menuTree.do";
                    //Dictionary<string, object> dtlInfo = null, dtlInfo2 = null;
                    //string infoJson = string.Empty, infoJson2 = string.Empty;
                    string htmldtl = string.Empty;
                    try
                    {
                        htmldtl = ToolWeb.GetHtmlByUrl(cUrl).GetJsString();
                        //NameValueCollection dtlNvc = ToolWeb.GetNameValueCollection(new string[] {
                        //"enterpriseId","menutype"
                        //}, new string[] { enterpriseId, "" });

                        //infoJson = ToolWeb.GetHtmlByUrl(infoUrl, dtlNvc, Encoding.UTF8);
                        //dtlInfo = (Dictionary<string, object>)serializer.DeserializeObject(infoJson);

                        //dtlNvc = ToolWeb.GetNameValueCollection(new string[] {
                        //"enterpriseId",
                        //"menutype",
                        //"actionFlag"
                        //}, new string[] {
                        //enterpriseId,"",""
                        //});

                        //infoJson2 = ToolWeb.GetHtmlByUrl(infoUrl2, dtlNvc, Encoding.UTF8);
                        //dtlInfo2 = (Dictionary<string, object>)serializer.DeserializeObject(infoJson2);
                    }
                    catch { continue; }

                    CorpAddress = ToolHtml.GetHtmlInputValue(htmldtl, "_M.registerAddress");
                    RegDate     = ToolHtml.GetHtmlInputValue(htmldtl, "_M.registerTime");
                    RegFund     = ToolHtml.GetHtmlInputValue(htmldtl, "_M.licenseCapital");
                    if (!string.IsNullOrEmpty(RegFund))
                    {
                        RegFund += "万元";
                    }
                    BusinessCode = ToolHtml.GetHtmlInputValue(htmldtl, "_M.licenseRegistrationCode");
                    CorpSite     = ToolHtml.GetHtmlInputValue(htmldtl, "_M.firmWebsite");

                    LinkMan   = ToolHtml.GetHtmlInputValue(htmldtl, "_M.name");
                    Email     = ToolHtml.GetHtmlInputValue(htmldtl, "_M.email");
                    LinkPhone = ToolHtml.GetHtmlInputValue(htmldtl, "_M.tel");
                    Fax       = ToolHtml.GetHtmlInputValue(htmldtl, "_M.fax");
                    corpMgr   = ToolHtml.GetHtmlInputValue(htmldtl, "_M.legalPersonName");

                    Parser   parser   = new Parser(new Lexer(htmldtl));
                    NodeList typeNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "g2-cell col-sm-6")));
                    if (typeNode != null && typeNode.Count > 0)
                    {
                        string str = string.Empty;
                        for (int j = 2; j < typeNode.Count; j++)
                        {
                            string semp = typeNode[j].ToNodePlainString();
                            if (!string.IsNullOrEmpty(semp))
                            {
                                try
                                {
                                    DateTime time = DateTime.Parse(semp);
                                    continue;
                                }
                                catch { }
                                str += semp + ",";
                            }
                        }
                        if (!string.IsNullOrEmpty(str))
                        {
                            corpType = str.Remove(str.Length - 1);
                        }
                    }


                    CorpInfo info = ToolDb.GenCorpInfo(CorpName, CorpCode, CorpAddress, RegDate, RegFund, BusinessCode, BusinessType, LinkMan, LinkPhone, Fax, Email, CorpSite, corpType, "广东省", "广东地区", "广东省住房和城乡建设厅", cUrl, ISOQualNum, ISOEnvironNum, string.Empty);

                    string exisSql = string.Format("select Id from CorpInfo where CorpName='{0}' and CorpType='{1}' and InfoSource='{2}'", info.CorpName, info.CorpType, info.InfoSource);

                    string corpId = Convert.ToString(ToolDb.ExecuteScalar(exisSql));

                    if (!string.IsNullOrEmpty(corpId))
                    {
                        string delCorpQual = string.Format("delete from CorpQual where CorpId='{0}'", corpId);
                        string delCorpLeader = string.Format("delete from CorpLeader where CorpId='{0}'", corpId);
                        string delCorpSecLicStaff = string.Format("delete from CorpSecLicStaff where CorpId='{0}'", corpId);
                        int    qualCount = 0, leaderCount = 0, tecstaffCount = 0, infoCount = 0;
                        string corpSql = string.Format("delete from CorpInfo where Id='{0}'", corpId);
                        infoCount     = ToolDb.ExecuteSql(corpSql);
                        qualCount     = ToolDb.ExecuteSql(delCorpQual);
                        leaderCount   = ToolDb.ExecuteSql(delCorpLeader);
                        tecstaffCount = ToolDb.ExecuteSql(delCorpSecLicStaff);

                        if (infoCount > 0)
                        {
                            ToolDb.SaveEntity(info, "");
                        }
                        if (qualCount >= 0)
                        {
                            try
                            {
                                AddCorpQual(info, enterpriseId);
                            }
                            catch (Exception ex) { Logger.Error(ex); }
                        }
                        if (leaderCount >= 0)
                        {
                            try
                            {
                                AddCorpLeader(info, enterpriseId);
                            }
                            catch (Exception ex) { Logger.Error(ex); }
                        }
                        if (tecstaffCount >= 0)
                        {
                            try
                            {
                                AddCorpStaff(info, enterpriseId);
                            }
                            catch (Exception ex) { Logger.Error(ex); }
                        }
                    }
                    else
                    {
                        if (ToolDb.SaveEntity(info, ""))
                        {
                            try
                            {
                                AddCorpLeader(info, enterpriseId);
                            }
                            catch (Exception ex) { Logger.Error(ex); }
                            try
                            {
                                AddCorpQual(info, enterpriseId);
                            }
                            catch (Exception ex) { Logger.Error(ex); }
                            try
                            {
                                AddCorpStaff(info, enterpriseId);
                            }
                            catch (Exception ex) { Logger.Error(ex); }
                        }
                    }
                }
            }

            ToolCoreDb.ExecuteProcedure();
            return(null);
        }
Ejemplo n.º 9
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("vAlign", "bottom")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("总页数:", "当前");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState = this.ToolWebSite.GetAspNetViewState(html);
                    string __CSRFTOKEN      = ToolHtml.GetHtmlInputValue(html, "__CSRFTOKEN");
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__CSRFTOKEN",
                        "__VIEWSTATE",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT"
                    },
                                                                                      new string[] {
                        __CSRFTOKEN,
                        viewState,
                        "MoreInfoList1$Pager",
                        i.ToString()
                    });
                    try
                    {
                        cookiestr = cookiestr.GetReplace(new string[] { "path=/;", "HttpOnly", "," });
                        html      = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty,
                               city = string.Empty;

                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        prjName   = aTag.GetAttribute("title");
                        beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://www.spprec.com" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "ivs_content")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt   = dtlNode.AsHtml();
                            inviteCtx = HtmlTxt.GetReplace("<br />,<br/>,<br>,</p>", "\r\n").ToCtxString();
                            buildUnit = inviteCtx.GetBuildRegex();
                            if (buildUnit.Contains("联系"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("联系"));
                            }
                            if (buildUnit.Contains("地址"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址"));
                            }

                            prjAddress = inviteCtx.GetAddressRegex();
                            code       = inviteCtx.GetCodeRegex().GetCodeDel();

                            specType = inviteType = "政府采购";
                            msgType  = "四川省公共资源交易中心";
                            InviteInfo info = ToolDb.GenInviteInfo("四川省", "四川省及地市", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag tag = aNode[k] as ATag;
                                    if (tag.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (tag.Link.ToLower().Contains("http"))
                                        {
                                            link = tag.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.spprec.com" + tag.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(tag.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 10
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default, ref cookiestr);
            }
            catch { }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "MoreInfoList1_Pager")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[0].ToNodePlainString().GetRegexBegEnd("1/", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    string __CSRFTOKEN      = ToolHtml.GetHtmlInputValue(html, "__CSRFTOKEN");
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__CSRFTOKEN",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__LASTFOCUS",
                        "__VIEWSTATE",
                        "__VIEWSTATEGENERATOR",
                        "__EVENTVALIDATION",
                        "MoreInfoList1$txtProjectName",
                        "MoreInfoList1$txtBiaoDuanName",
                        "MoreInfoList1$txtBiaoDuanNo",
                        "MoreInfoList1$txtJSDW",
                        "MoreInfoList1$StartDate",
                        "MoreInfoList1$EndDate",
                        "MoreInfoList1$jpdDi",
                        "MoreInfoList1$jpdXian"
                    }, new string[] {
                        __CSRFTOKEN,
                        "MoreInfoList1$Pager",
                        i.ToString(),
                        "",
                        viewState,
                        "76D0A3AC",
                        eventValidation,
                        "", "", "", "", "", "",
                        "-1", "-1"
                    });
                    try
                    {
                        cookiestr = cookiestr.GetReplace("path=/; HttpOnly").Replace(",", "");
                        html      = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        prjName = aTag.GetAttribute("title").GetReplace(";");
                        area    = prjName.GetReplace("[", "【").GetReplace("]", "】").GetRegexBegEnd("【", "】");
                        if (!string.IsNullOrEmpty(area))
                        {
                            prjName = prjName.GetReplace("[" + area + "]");
                        }
                        beginDate = tr.Columns[3].ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://www.jszb.com.cn/jszb/YW_info/" + aTag.GetAttribute("onclick").Replace("(", "(").GetRegexBegEnd("(", ",").GetReplace("\",../,./");
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "zygg_Text_23")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml();
                            inviteCtx  = HtmlTxt.ToCtxString();
                            prjAddress = inviteCtx.GetAddressRegex();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            if (buildUnit.Contains("公司"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                            }
                            if (buildUnit.Contains("地址"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址"));
                            }
                            code       = inviteCtx.GetCodeRegex().GetCodeDel();
                            msgType    = "江苏省建设工程招标投标办公室";
                            specType   = "建设工程";
                            inviteType = "建设工程";
                            InviteInfo info = ToolDb.GenInviteInfo("江苏省", "江苏省及地市", area, string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.jszb.com.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 11
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new List <BidInfo>();
            Dictionary <string, string> citys = this.GetCitys();

            foreach (string area in citys.Keys)
            {
                int    count           = 0;
                int    pageInt         = 1;
                string html            = string.Empty;
                string viewState       = string.Empty;
                string eventValidation = string.Empty;
                string cookiestr       = string.Empty;
                try
                {
                    html = this.ToolWebSite.GetHtmlByUrl(citys[area], Encoding.UTF8, ref cookiestr);
                }
                catch { return(list); }
                Parser   parser   = new Parser(new Lexer(html));
                NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("nowrap", "true")));
                if (pageNode != null && pageNode.Count > 0)
                {
                    try
                    {
                        string temp = pageNode.AsString().GetRegexBegEnd("总页数", "当前页").Replace(":", "");
                        pageInt = int.Parse(temp);
                    }
                    catch { }
                }
                for (int i = 1; i <= pageInt; i++)
                {
                    if (i > 1)
                    {
                        viewState       = this.ToolWebSite.GetAspNetViewState(html);
                        eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                        string viewSTATEGENERATOR = ToolHtml.GetHtmlInputValue(html, "__VIEWSTATEGENERATOR");
                        NameValueCollection nvc   = this.ToolWebSite.GetNameValueCollection(new string[] {
                            "__VIEWSTATE",
                            "__VIEWSTATEGENERATOR",
                            "__EVENTTARGET",
                            "__EVENTARGUMENT",
                            "__EVENTVALIDATION",
                            "MoreInfoList1$txtTitle"
                        },
                                                                                            new string[] {
                            viewState,
                            viewSTATEGENERATOR,
                            "MoreInfoList1$Pager",
                            i.ToString(),
                            eventValidation,
                            ""
                        });
                        try
                        {
                            html = this.ToolWebSite.GetHtmlByUrl(citys[area], nvc, Encoding.UTF8, ref cookiestr);
                        }
                        catch { continue; }
                    }
                    parser = new Parser(new Lexer(html));
                    NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1")));
                    if (listNode != null && listNode.Count > 0)
                    {
                        TableTag table = listNode[0] as TableTag;
                        for (int j = 1; j < table.RowCount; j++)
                        {
                            string prjName = string.Empty,
                                   buildUnit = string.Empty, bidUnit = string.Empty,
                                   bidMoney = string.Empty, code = string.Empty,
                                   bidDate = string.Empty,
                                   beginDate = string.Empty,
                                   endDate = string.Empty, bidType = string.Empty,
                                   specType = string.Empty, InfoUrl = string.Empty,
                                   msgType = string.Empty, bidCtx = string.Empty,
                                   prjAddress = string.Empty, remark = string.Empty,
                                   prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                            TableRow tr   = table.Rows[j];
                            ATag     aTag = tr.Columns[1].GetATag();
                            prjName   = aTag.GetAttribute("title").GetReplace("【正在报名】,【报名结束】");
                            beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                            InfoUrl   = "http://www.gxzbtb.cn" + aTag.Link;
                            string htmldtl = string.Empty;
                            try
                            {
                                htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                            }
                            catch { continue; }
                            parser = new Parser(new Lexer(htmldtl));
                            NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent")));
                            if (dtlNode != null && dtlNode.Count > 0)
                            {
                                HtmlTxt    = dtlNode.AsHtml();
                                bidCtx     = HtmlTxt.GetReplace(new string[] { "<br/>", "<br />", "<br>" }, "\r\n").ToCtxString();
                                prjAddress = bidCtx.GetAddressRegex();
                                buildUnit  = bidCtx.GetBuildRegex();
                                bidUnit    = bidCtx.GetBidRegex();
                                bidMoney   = bidCtx.GetMoneyRegex();
                                prjMgr     = bidCtx.GetMgrRegex();
                                code       = bidCtx.GetCodeRegex().GetCodeDel();

                                if (string.IsNullOrEmpty(bidUnit))
                                {
                                    parser = new Parser(new Lexer(HtmlTxt));
                                    NodeList bidNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                                    if (bidNode != null && bidNode.Count > 0)
                                    {
                                        string   ctx      = string.Empty;
                                        TableTag bidTable = bidNode[0] as TableTag;
                                        for (int r = 0; r < bidTable.RowCount; r++)
                                        {
                                            for (int c = 0; c < bidTable.Rows[r].ColumnCount; c++)
                                            {
                                                if ((c + 1) % 2 == 0)
                                                {
                                                    ctx += bidTable.Rows[r].Columns[c].ToNodePlainString() + "\r\n";
                                                }
                                                else
                                                {
                                                    ctx += bidTable.Rows[r].Columns[c].ToNodePlainString() + ":";
                                                }
                                            }
                                        }

                                        bidUnit = ctx.GetBidRegex();
                                        if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0")
                                        {
                                            bidMoney = ctx.GetMoneyString().GetMoney("万元");
                                        }
                                        if (string.IsNullOrEmpty(prjAddress))
                                        {
                                            prjAddress = ctx.GetAddressRegex();
                                        }
                                        if (string.IsNullOrEmpty(buildUnit))
                                        {
                                            buildUnit = ctx.GetBuildRegex();
                                        }
                                        if (string.IsNullOrEmpty(code))
                                        {
                                            code = ctx.GetCodeRegex().GetCodeDel();
                                        }
                                        if (bidUnit.Contains("推荐") || bidUnit.Contains("中标") || bidUnit.Contains("地址"))
                                        {
                                            bidUnit = string.Empty;
                                        }
                                        if (string.IsNullOrEmpty(bidUnit))
                                        {
                                            if (bidTable.RowCount > 1)
                                            {
                                                ctx = string.Empty;
                                                for (int d = 0; d < bidTable.Rows[0].ColumnCount; d++)
                                                {
                                                    ctx += bidTable.Rows[0].Columns[d].ToNodePlainString() + ":";
                                                    try
                                                    {
                                                        ctx += bidTable.Rows[1].Columns[d].ToNodePlainString() + "\r\n";
                                                    }
                                                    catch { }
                                                }
                                                bidUnit = ctx.GetBidRegex();
                                                if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0")
                                                {
                                                    bidMoney = ctx.GetMoneyString().GetMoney();
                                                }
                                                if (string.IsNullOrEmpty(prjAddress))
                                                {
                                                    prjAddress = ctx.GetAddressRegex();
                                                }
                                                if (string.IsNullOrEmpty(buildUnit))
                                                {
                                                    buildUnit = ctx.GetBuildRegex();
                                                }
                                                if (string.IsNullOrEmpty(code))
                                                {
                                                    code = ctx.GetCodeRegex().GetCodeDel();
                                                }
                                            }
                                        }
                                    }
                                }
                                try
                                {
                                    if (decimal.Parse(bidMoney) > 10000)
                                    {
                                        bidMoney = (decimal.Parse(bidMoney) / 10000).ToString();
                                    }
                                }
                                catch { }
                                bidUnit = bidUnit.Replace("名称", "").Replace("单位", "").Replace("№", "").Replace("1", "").Replace("2", "").Replace("联合体", "").Replace("(", "");

                                if (bidUnit.Contains("公司"))
                                {
                                    bidUnit = bidUnit.Remove(bidUnit.IndexOf("公司")) + "公司";
                                }
                                if (bidUnit.Contains("研究院"))
                                {
                                    bidUnit = bidUnit.Remove(bidUnit.IndexOf("研究院")) + "研究院";
                                }
                                if (bidUnit.Contains("研究所"))
                                {
                                    bidUnit = bidUnit.Remove(bidUnit.IndexOf("研究所")) + "研究所";
                                }
                                bidType  = "房建市政";
                                specType = "建设工程";
                                msgType  = "广西壮族自治区公共资源交易中心";
                                BidInfo info = ToolDb.GenBidInfo("广西壮族自治区", "广西壮族自治区及地市", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                                list.Add(info);
                                count++;
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k] as ATag;
                                        if (a.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = a.Link;
                                            }
                                            else
                                            {
                                                link = "http://www.gxzbtb.cn/" + a.Link.GetReplace("../,./");
                                            }
                                            if (Encoding.Default.GetByteCount(link) > 500)
                                            {
                                                continue;
                                            }
                                            BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                            base.AttachList.Add(attach);
                                        }
                                    }
                                }
                                if (!crawlAll && count >= this.MaxCount)
                                {
                                    goto Funcs;
                                }
                            }
                        }
                    }
                }
                Funcs :;
            }
            return(list);
        }
Ejemplo n.º 12
0
 /// <summary>
 /// 获取页面input值
 /// </summary>
 /// <param name="htl"></param>
 /// <param name="inputId"></param>
 /// <returns></returns>
 public static string GetInputValue(this string htl, string inputId)
 {
     return(ToolHtml.GetHtmlInputValue(htl, inputId));
 }
Ejemplo n.º 13
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list = new List <ItemInfo>();
            string html = string.Empty;
            string cookiestr = string.Empty;
            string viewState = string.Empty;
            int    pageInt = 1, sqlCount = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "statusBar")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    string temp = pageList.AsString().GetRegexBegEnd("找到", "条");
                    pageInt = (Convert.ToInt32(temp) + 20 - 1) / 20;
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    string id = ToolHtml.GetHtmlInputValue(html, "id");
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "ec_i",
                        "topicChrList_20070702_crd",
                        "topicChrList_20070702_f_a",
                        "topicChrList_20070702_p",
                        "topicChrList_20070702_s_name",
                        "topicChrList_20070702_s_topName",
                        "id",
                        "method",
                        "__ec_pages",
                        "topicChrList_20070702_rd",
                        "topicChrList_20070702_f_name",
                        "topicChrList_20070702_f_topName",
                        "topicChrList_20070702_f_ldate"
                    },
                                                                                      new string[] {
                        "topicChrList_20070702",
                        "20",
                        "",
                        i.ToString(),
                        "",
                        "",
                        id,
                        "view",
                        i.ToString(),
                        "20",
                        "", "", ""
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "topicChrList_20070702_table")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 3; j < table.RowCount; j++)
                    {
                        TableRow tr = table.Rows[j];
                        string   itemCode = string.Empty, itemName = string.Empty, buildUnit = string.Empty, address = string.Empty,
                                 investMent = string.Empty, buildKind = string.Empty, investKink = string.Empty, linkMan = string.Empty,
                                 linkmanTel = string.Empty, itemDesc = string.Empty, apprNo = string.Empty, apprDate = string.Empty,
                                 apprUnit = string.Empty, apprResult = string.Empty, landapprNo = string.Empty, landplanNo = string.Empty, buildDate = string.Empty, infoSource = string.Empty, url = string.Empty,
                                 textCode = string.Empty, licCode = string.Empty, msgType = string.Empty, ctxHtml = string.Empty;

                        string listName = string.Empty;
                        listName  = tr.Columns[1].ToNodePlainString();
                        buildDate = tr.Columns[3].ToNodePlainString().GetDateRegex();

                        url = "http://www.szzfcg.cn" + tr.Columns[1].GetATagHref();

                        string htlDtl = string.Empty;
                        try
                        {
                            htlDtl = this.ToolWebSite.GetHtmlByUrl(url, Encoding.UTF8);
                        }
                        catch
                        {
                            continue;
                        }
                        parser = new Parser(new Lexer(htlDtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "tab")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            ctxHtml    = dtlNode.AsHtml();
                            infoSource = ctxHtml.ToCtxString();

                            string   ctx      = string.Empty;
                            TableTag dtlTable = dtlNode[0] as TableTag;
                            for (int k = 0; k < dtlTable.RowCount; k++)
                            {
                                for (int d = 0; d < dtlTable.Rows[k].ColumnCount; d++)
                                {
                                    if ((d + 1) % 2 == 0)
                                    {
                                        ctx += dtlTable.Rows[k].Columns[d].ToNodePlainString() + "\r\n";
                                    }
                                    else
                                    {
                                        ctx += dtlTable.Rows[k].Columns[d].ToNodePlainString() + ":";
                                    }
                                }
                            }
                            itemName = ctx.GetRegex("项目名称,工程名称,名称");
                            if (string.IsNullOrEmpty(itemName))
                            {
                                itemName = listName;
                            }

                            buildUnit  = ctx.GetRegex("采购人名称");
                            investMent = ctx.GetRegex("财政预算限额(元)");
                            investMent = investMent.GetMoney();

                            msgType = "深圳政府采购";
                            ItemInfo info = ToolDb.GenItemInfo(itemCode, itemName, buildUnit, address, investMent, buildKind, investKink, linkMan, linkmanTel, itemDesc, apprNo, apprDate, apprUnit, apprResult, landapprNo, landplanNo, buildDate, "广东省", "深圳市区", infoSource, url, textCode, licCode, msgType, ctxHtml);

                            sqlCount++;
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(list);
                            }

                            if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                            {
                                BaseProject prj = new BaseProject();
                                prj.Id         = ToolDb.NewGuid;
                                prj.PrjCode    = info.ItemCode;
                                prj.PrjName    = info.ItemName;
                                prj.BuildUnit  = info.BuildUnit;
                                prj.BuildTime  = info.BuildDate;
                                prj.Createtime = info.CreateTime;
                                prj.PrjAddress = info.Address;
                                prj.InfoSource = info.InfoSource;
                                prj.MsgType    = info.MsgType;
                                prj.Province   = info.Province;
                                prj.City       = info.City;
                                prj.Url        = info.Url;

                                ToolDb.SaveEntity(prj, "Url", this.ExistsUpdate);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 14
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <NoticeInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default, ref cookiestr);
            }
            catch { }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "MoreInfoList1_Pager")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[0].ToNodePlainString().GetRegexBegEnd("1/", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    string __CSRFTOKEN      = ToolHtml.GetHtmlInputValue(html, "__CSRFTOKEN");
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__CSRFTOKEN",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__LASTFOCUS",
                        "__VIEWSTATE",
                        "__VIEWSTATEGENERATOR",
                        "__EVENTVALIDATION",
                        "MoreInfoList1$txtProjectName",
                        "MoreInfoList1$txtBiaoDuanName",
                        "MoreInfoList1$txtBiaoDuanNo",
                        "MoreInfoList1$txtJSDW",
                        "MoreInfoList1$StartDate",
                        "MoreInfoList1$EndDate",
                        "MoreInfoList1$jpdDi",
                        "MoreInfoList1$jpdXian"
                    }, new string[] {
                        __CSRFTOKEN,
                        "MoreInfoList1$Pager",
                        i.ToString(),
                        "",
                        viewState,
                        "76D0A3AC",
                        eventValidation,
                        "", "", "", "", "", "",
                        "-1", "-1"
                    });
                    try
                    {
                        cookiestr = cookiestr.GetReplace("path=/; HttpOnly").Replace(",", "");
                        html      = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string InfoTitle = string.Empty, InfoType = string.Empty, PublistTime = string.Empty, InfoCtx = string.Empty, InfoUrl = string.Empty, prjCode = string.Empty, buildUnit = string.Empty, htmlTxt = string.Empty, area = string.Empty, bgType = string.Empty;
                        InfoType = "最高限价公示";
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        InfoTitle = aTag.GetAttribute("title").GetReplace(";");
                        area      = InfoTitle.GetReplace("[", "【").GetReplace("]", "】").GetRegexBegEnd("【", "】");
                        if (!string.IsNullOrEmpty(area))
                        {
                            InfoTitle = InfoTitle.GetReplace("[" + area + "]");
                        }
                        bgType      = tr.Columns[2].ToNodePlainString();
                        PublistTime = tr.Columns[3].ToPlainTextString().GetDateRegex();
                        InfoUrl     = "http://www.jszb.com.cn/jszb/YW_info/" + aTag.GetAttribute("onclick").Replace("(", "(").GetRegexBegEnd("(", ",").GetReplace("\",../,./");
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "Table1")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            htmlTxt = dtlNode.AsHtml();
                            TableTag tag = dtlNode[0] as TableTag;
                            for (int r = 0; r < tag.RowCount; r++)
                            {
                                for (int c = 0; c < tag.Rows[r].ColumnCount; c++)
                                {
                                    string temp = tag.Rows[r].Columns[c].ToNodePlainString();
                                    if ((c + 1) % 2 == 0)
                                    {
                                        InfoCtx += temp + "\r\n";
                                    }
                                    else
                                    {
                                        InfoCtx += temp.GetReplace(":,:") + ":";
                                    }
                                }
                            }
                            prjCode   = InfoCtx.GetCodeRegex();
                            buildUnit = InfoCtx.GetBuildRegex();
                            NoticeInfo info = ToolDb.GenNoticeInfo("江苏省", "江苏省及地市", area, string.Empty, InfoTitle, InfoType, InfoCtx, PublistTime, string.Empty, "江苏省建设工程招标投标办公室", InfoUrl, prjCode, buildUnit, string.Empty, string.Empty, "建设工程", bgType, htmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(htmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach() || a.Link.ToLower().Contains("retrieveimagedata.aspx"))
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.jszb.com.cn/jszb/YW_info/ZuiGaoXJ/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 15
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            //取得页码
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default).GetJsString();
            }
            catch (Exception ex)
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "dataPager")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    string temp = pageList.AsString().GetRegexBegEnd("共有:", "页");
                    pageInt = Convert.ToInt32(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        viewState = this.ToolWebSite.GetAspNetViewState(html);
                        string dataPager_input  = ToolHtml.GetHtmlInputValue(html, "dataPager_input");
                        NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "searcher:txtKeyWord", "searcher:tcInputDateTime:txtDateTime1", "searcher:tcInputDateTime:txtDateTime2", "searcher:ddlProvince", "searcher:ddlCity1", "searcher:ddlCity2", "dataPager_input" }, new string[] {
                            "dataPager", i.ToString(), viewState, "", "", "", "-1", "-1", "-1", dataPager_input
                        });
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "p3")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = null;
                    if (nodeList.Count > 1)
                    {
                        table = nodeList[1] as TableTag;
                    }
                    else
                    {
                        table = nodeList[0] as TableTag;
                    }
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        TableRow tr = table.Rows[j];
                        infoType    = "通知公告";
                        headName    = tr.Columns[0].ToNodePlainString();
                        releaseTime = tr.Columns[1].ToPlainTextString().GetDateRegex();
                        infoUrl     = "http://www.sgjsj.gov.cn/sgwebims/" + tr.Columns[0].GetATagValue("onclick").Replace("(", "kdxx").Replace(")", "xxdk").GetRegexBegEnd("kdxx", "xxdk").Replace("\"", "");
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "Table4")));
                        if (dtlList != null && dtlList.Count > 0)
                        {
                            ctxHtml = dtlList.AsHtml();
                            infoCtx = ctxHtml.ToCtxString();
                            msgType = MsgTypeCosnt.ShaoGuanMsgType;
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "韶关市区", string.Empty, infoCtx, infoType);
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            else
                            {
                                sqlCount++;
                                if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                                {
                                    parser = new Parser(new Lexer(htldtl));
                                    NodeList tabNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "Table1")));
                                    NodeList aNode   = null;
                                    if (tabNode != null && tabNode.Count > 1)
                                    {
                                        parser = new Parser(new Lexer(tabNode[1].ToHtml()));
                                        aNode  = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                    }
                                    else if (tabNode != null && tabNode.Count > 0)
                                    {
                                        parser = new Parser(new Lexer(tabNode.AsHtml()));
                                        aNode  = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                    }
                                    if (aNode != null && aNode.Count > 0)
                                    {
                                        for (int a = 0; a < aNode.Count; a++)
                                        {
                                            ATag aTag = aNode[a] as ATag;
                                            if (aTag.IsAtagAttach())
                                            {
                                                try
                                                {
                                                    BaseAttach obj = ToolHtml.GetBaseAttach("http://www.sgjsj.gov.cn/sgwebims/" + aTag.Link.Replace("../", "").Replace("./", ""), aTag.LinkText, info.Id);
                                                    if (obj != null)
                                                    {
                                                        ToolDb.SaveEntity(obj, string.Empty);
                                                    }
                                                }
                                                catch { }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }