Exemplo n.º 1
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list     = new List <NoticeInfo>();
            int    sqlCount = 0;
            string html     = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + this.MaxCount);
            }
            catch { return(null); }
            int startIndex = html.IndexOf("{");
            int endIndex   = html.LastIndexOf("}");

            html = html.Substring(startIndex, (endIndex + 1) - startIndex);
            JavaScriptSerializer        serializer  = new JavaScriptSerializer();
            Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html);

            foreach (KeyValuePair <string, object> obj in smsTypeJson)
            {
                if (obj.Key == "total")
                {
                    continue;
                }
                object[] array = (object[])obj.Value;
                foreach (object arrValue in array)
                {
                    string InfoTitle = string.Empty, InfoType = string.Empty, bgType = string.Empty, prjType = string.Empty,
                                PublistTime = string.Empty, InfoCtx = string.Empty, InfoUrl = string.Empty,
                                prjCode = string.Empty, buildUnit = string.Empty, htmlTxt = string.Empty,
                                infoSource          = string.Empty;
                    Dictionary <string, object> dic = (Dictionary <string, object>)arrValue;
                    InfoTitle = Convert.ToString(dic["ggName"]);
                    prjCode   = Convert.ToString(dic["bdBH"]);
                    string kzJguid = Convert.ToString(dic["kzJGuid"]);
                    InfoType    = "控制价公示";
                    PublistTime = Convert.ToString(dic["fbStartTime2"]);
                    prjType     = Convert.ToString(dic["gcLeiXing2"]);
                    InfoUrl     = Convert.ToString(dic["detailUrl"]);
                    try
                    {
                        Uri    uri = new Uri(InfoUrl);
                        string url = "https://www.szjsjy.com.cn:8001/jyw-lg/jyxx/queryOldOTDataDetail.do" + uri.Query;

                        htmlTxt = this.ToolWebSite.GetHtmlByUrl(url);
                        htmlTxt = htmlTxt.GetReplace("\"");
                    }
                    catch { continue; }
                    InfoCtx   = htmlTxt.GetReplace("<br />", "\r\n").GetReplace("</tr>", "\r\n").ToCtxString();
                    buildUnit = InfoCtx.GetBuildRegex();
                    if (string.IsNullOrEmpty(buildUnit))
                    {
                        buildUnit = InfoCtx.GetRegex("标底审核单位");
                    }

                    infoSource = "深圳市建设工程交易服务中心龙岗分中心";
                    NoticeInfo info = ToolDb.GenNoticeInfo("广东省", "深圳龙岗区工程", "龙岗区", string.Empty, InfoTitle, InfoType, InfoCtx, PublistTime, string.Empty, infoSource, InfoUrl, prjCode, buildUnit, string.Empty, string.Empty, prjType, bgType, htmlTxt);
                    list.Add(info);

                    if (!crawlAll && list.Count >= this.MaxCount)
                    {
                        return(list);
                    }


                    Parser   parser   = new Parser(new Lexer(htmlTxt));
                    NodeList fileNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                    if (fileNode != null && fileNode.Count > 0)
                    {
                        for (int f = 0; f < fileNode.Count; f++)
                        {
                            ATag tag = fileNode[f] as ATag;
                            if (tag.IsAtagAttach() || tag.Link.ToLower().Contains("downloadfile"))
                            {
                                try
                                {
                                    string link = string.Empty;
                                    if (tag.Link.ToLower().Contains("http"))
                                    {
                                        link = tag.Link;
                                        if (link.StartsWith("\\"))
                                        {
                                            link = link.Substring(link.IndexOf("\\"), link.Length - link.IndexOf("\\"));
                                        }
                                        if (link.EndsWith("//"))
                                        {
                                            link = link.Remove(link.LastIndexOf("//"));
                                        }
                                        link = link.GetReplace("\\", "");
                                        link = link.GetReplace("\"", "");
                                    }
                                    else
                                    {
                                        link = "https://www.szjsjy.com.cn:8001/" + tag.Link;
                                    }
                                    BaseAttach attach = ToolDb.GenBaseAttach(tag.LinkText, info.Id, link);
                                    base.AttachList.Add(attach);
                                }
                                catch { continue; }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Exemplo n.º 2
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 27;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch { return(null); }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.hbfgw.gov.cn/hqfw/xmgg/xmkzgg/index_" + (i - 1).ToString() + ".shtml");
                    }
                    catch { continue; }
                }
                Parser   parser   = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "mytable")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string   ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;
                        TableRow tr = table.Rows[j];
                        ItemCode = tr.Columns[0].ToNodePlainString().GetReplace("('无')").GetReplace("('", "kdxx").GetReplace("')", "xxdk").GetRegexBegEnd("kdxx", "xxdk");
                        ATag aTag = tr.Columns[1].GetATag();
                        ItemName     = aTag.LinkText;
                        ApprovalUnit = tr.Columns[2].ToNodePlainString();
                        PlanDate     = tr.Columns[3].ToPlainTextString().GetDateRegex();
                        InfoUrl      = "http://www.hbfgw.gov.cn/hqfw/xmgg/xmkzgg/" + aTag.Link.GetReplace("../,./");
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "appendixDiv")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            parser = new Parser(new Lexer(htmldtl));
                            NodeList hNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("h1"));
                            if (hNode != null && hNode.Count > 0)
                            {
                                string temp = hNode[0].ToNodePlainString();
                                ItemName = string.IsNullOrEmpty(temp) ? ItemName : temp;
                            }
                            ItemName = ItemName.GetReplace("省发改委批复,省发改委核准");
                            CtxHtml  = dtlNode.AsHtml().Replace("none", "block");
                            ItemCtx  = CtxHtml.ToCtxString();

                            string        imgUrl = InfoUrl.Substring(0, InfoUrl.LastIndexOf("/"));
                            List <string> attach = new List <string>();
                            parser = new Parser(new Lexer(CtxHtml));
                            NodeList imgNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                            if (imgNode != null && imgNode.Count > 0)
                            {
                                for (int p = 0; p < imgNode.Count; p++)
                                {
                                    ImageTag img  = imgNode[p] as ImageTag;
                                    string   link = imgUrl + "/" + img.ImageURL.GetReplace("../,./");
                                    CtxHtml = CtxHtml.GetReplace(img.ImageURL, link);
                                    attach.Add(link);
                                }
                            }
                            PlanType = "项目核准信息";
                            MsgType  = "湖北省发展和改革委员会";

                            ItemPlan info = ToolDb.GenItemPlan("湖北省", "湖北省及地市", "", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);
                            list.Add(info);
                            if (attach.Count > 0)
                            {
                                for (int a = 0; a < attach.Count; a++)
                                {
                                    BaseAttach entity = ToolDb.GenBaseAttach(ItemName, info.Id, attach[a]);
                                    base.AttachList.Add(entity);
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Exemplo n.º 3
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8).Replace("&nbsp;", "");
            }
            catch
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("noWrap", "true")));

            if (sNode != null && sNode.Count > 0)
            {
                try
                {
                    string temp = sNode.AsString().GetRegexBegEnd("总页数:", "当");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            string cookiestr = string.Empty;

            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "?Paging=" + i);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("valign", "top")));
                if (sNode != null && sNode.Count > 0)
                {
                    TableTag table = sNode[0] as TableTag;
                    for (int t = 0; t < table.RowCount - 1; t++)
                    {
                        TableRow tr   = table.Rows[t];
                        ATag     aTag = tr.GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;


                        prjName   = aTag.GetAttribute("title");
                        beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://jyzx.maoming.gov.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt   = dtlNode.AsHtml();
                            inviteCtx = HtmlTxt.GetReplace("</p>", "\r\n").ToCtxString();

                            buildUnit  = inviteCtx.GetBuildRegex();
                            prjAddress = inviteCtx.GetAddressRegex();
                            code       = inviteCtx.GetCodeRegex();
                            msgType    = "茂名市公共资源交易网";
                            specType   = "建设工程";
                            inviteType = prjName.GetInviteBidType();
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "茂名市区", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://jyzx.maoming.gov.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, a.Link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            list.Add(info);
                        }
                        if (!crawlAll && list.Count >= this.MaxCount)
                        {
                            return(list);
                        }
                    }
                }
            }
            return(list);
        }
Exemplo n.º 4
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            //取得页码
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default).GetJsString();
            }
            catch (Exception ex)
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("height", "28")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    string temp = pageList.AsString().GetRegexBegEnd(",共", "页");
                    pageInt = Convert.ToInt32(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl + "&pageNum=" + i.ToString(), Encoding.Default).GetJsString();
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "95%")));
                if (nodeList != null && nodeList.Count > 1)
                {
                    TableTag table = nodeList[1] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty,
                               infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        TableRow tr = table.Rows[j];
                        infoType    = "政策法规";
                        headName    = tr.Columns[0].ToNodePlainString();
                        releaseTime = tr.Columns[1].ToPlainTextString().GetDateRegex();
                        infoUrl     = "http://market.meizhou.gov.cn" + tr.Columns[0].GetATagValue("onclick").GetRegexBegEnd(",'", "',");
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString();
                        }
                        catch
                        {
                            continue;
                        }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("align", "center")));

                        if (dtlList != null && dtlList.Count > 0)
                        {
                            if (dtlList.Count > 1)
                            {
                                ctxHtml = dtlList[1].ToHtml();
                            }
                            else
                            {
                                ctxHtml = dtlList.ToHtml();
                            }
                            infoCtx = ctxHtml.ToCtxString().Replace("&gt;", "");
                            msgType = MsgTypeCosnt.MeiZhouMsgType;
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "梅州市区", string.Empty, infoCtx, infoType);
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            else
                            {
                                sqlCount++;
                                if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                                {
                                    parser = new Parser(new Lexer(htldtl));
                                    NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                    if (aNode != null && aNode.Count > 0)
                                    {
                                        for (int a = 0; a < aNode.Count; a++)
                                        {
                                            ATag aTag = aNode[a] as ATag;
                                            if (aTag.IsAtagAttach())
                                            {
                                                try
                                                {
                                                    BaseAttach baseInfo = ToolHtml.GetBaseAttach("http://market.meizhou.gov.cn" + aTag.Link, aTag.LinkText, info.Id);
                                                    if (baseInfo != null)
                                                    {
                                                        ToolDb.SaveEntity(baseInfo, string.Empty);
                                                    }
                                                }
                                                catch { }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
Exemplo n.º 5
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "0h120")), true), new TagNameFilter("a")));

            if (sNode != null && sNode.Count > 0)
            {
                try
                {
                    string temp = sNode[sNode.Count - 2].GetATagValue("title");
                    pageInt = Convert.ToInt32(temp.GetReplace("第,页"));
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&page=" + i, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList viewList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "0h120")));
                if (viewList != null && viewList.Count > 0)
                {
                    for (int j = 0; j < viewList.Count; j++)
                    {
                        TableTag table = viewList[j] as TableTag;
                        string   prjName = string.Empty, InfoUrl = string.Empty, beginDate = string.Empty, HtmlTxt = string.Empty;
                        ATag     aTag = viewList[j].GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }
                        prjName   = aTag.GetAttribute("title");
                        beginDate = table.ToNodePlainString().GetDateRegex();
                        InfoUrl   = "http://renshan.huidong.gov.cn/" + aTag.Link;
                        string htlDtl = string.Empty;
                        try
                        {
                            htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htlDtl));
                        NodeList dtl = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "fontzoom")));
                        if (dtl != null && dtl.Count > 0)
                        {
                            HtmlTxt = dtl.AsHtml();

                            if (prjName.Contains("中标") || prjName.Contains("成交") || prjName.Contains("结果"))
                            {
                                string buildUnit = string.Empty, bidUnit = string.Empty,
                                       bidMoney = string.Empty, code = string.Empty,
                                       bidDate = string.Empty,
                                       endDate = string.Empty, bidType = string.Empty,
                                       specType = string.Empty,
                                       msgType = string.Empty, bidCtx = string.Empty,
                                       prjAddress = string.Empty, remark = string.Empty,
                                       prjMgr = string.Empty, otherType = string.Empty;
                                bidCtx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString();


                                code      = bidCtx.GetCodeRegex().GetCodeDel();
                                buildUnit = bidCtx.GetBuildRegex();
                                if (buildUnit.Contains("招标代理"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理"));
                                }
                                if (buildUnit.Contains("公司"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                                }

                                bidUnit = bidCtx.GetBidRegex();
                                if (string.IsNullOrEmpty(bidUnit))
                                {
                                    bidUnit = bidCtx.GetRegex("中标候选公司,中标候选人");
                                }
                                bidMoney = bidCtx.GetMoneyRegex();
                                try
                                {
                                    if (decimal.Parse(bidMoney) > 100000)
                                    {
                                        bidMoney = (decimal.Parse(bidMoney) / 10000).ToString();
                                    }
                                }
                                catch { }
                                Parser   imgParser = new Parser(new Lexer(HtmlTxt.ToLower()));
                                NodeList imgNode   = imgParser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                                string   src       = string.Empty;
                                if (imgNode != null && imgNode.Count > 0)
                                {
                                    string imgUrl = (imgNode[0] as ImageTag).GetAttribute("src");
                                    src     = "http://renshan.huidong.gov.cn/" + imgUrl;
                                    HtmlTxt = HtmlTxt.ToLower().GetReplace(imgUrl, src);
                                }
                                msgType  = "惠东县稔山镇人民政府";
                                specType = "政府采购";
                                bidType  = prjName.GetInviteBidType();
                                BidInfo info = ToolDb.GenBidInfo("广东省", "惠州市区", "惠东县", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType,
                                                                 bidMoney, InfoUrl, prjMgr, HtmlTxt);
                                list.Add(info);
                                if (!string.IsNullOrEmpty(src))
                                {
                                    string sql = string.Format("select Id from BidInfo where InfoUrl='{0}'", info.InfoUrl);
                                    object obj = ToolDb.ExecuteScalar(sql);
                                    if (obj == null || obj.ToString() == "")
                                    {
                                        try
                                        {
                                            BaseAttach attach = ToolHtml.GetBaseAttach(src, prjName, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                            if (attach != null)
                                            {
                                                ToolDb.SaveEntity(attach, "");
                                            }
                                        }
                                        catch { }
                                    }
                                }
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k].GetATag();
                                        if (a.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = a.Link;
                                            }
                                            else
                                            {
                                                link = "http://renshan.huidong.gov.cn/" + a.Link;
                                            }
                                            BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                            base.AttachList.Add(attach);
                                        }
                                    }
                                }
                                if (!crawlAll && list.Count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }
                            else
                            {
                                string code = string.Empty, buildUnit = string.Empty,
                                       prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                                       specType = string.Empty, endDate = string.Empty,
                                       remark = string.Empty, inviteCon = string.Empty,
                                       CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty;

                                inviteCtx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString();


                                inviteType = prjName.GetInviteBidType();

                                code       = inviteCtx.GetCodeRegex().GetCodeDel();
                                buildUnit  = inviteCtx.GetBuildRegex();
                                prjAddress = inviteCtx.GetAddressRegex();
                                if (buildUnit.Contains("招标代理"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理"));
                                }
                                if (buildUnit.Contains("公司"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                                }

                                Parser   imgParser = new Parser(new Lexer(HtmlTxt.ToLower()));
                                NodeList imgNode   = imgParser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                                string   src       = string.Empty;
                                if (imgNode != null && imgNode.Count > 0)
                                {
                                    string imgUrl = (imgNode[0] as ImageTag).GetAttribute("src");
                                    src     = "http://renshan.huidong.gov.cn/" + imgUrl;
                                    HtmlTxt = HtmlTxt.ToLower().GetReplace(imgUrl, src);
                                }
                                msgType = "惠东县稔山镇人民政府";

                                specType = "政府采购";

                                InviteInfo info = ToolDb.GenInviteInfo("广东省", "惠州市区", "惠东县", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                                list.Add(info);
                                if (!string.IsNullOrEmpty(src))
                                {
                                    string sql = string.Format("select Id from InviteInfo where InfoUrl='{0}'", info.InfoUrl);
                                    object obj = ToolDb.ExecuteScalar(sql);
                                    if (obj == null || obj.ToString() == "")
                                    {
                                        try
                                        {
                                            BaseAttach attach = ToolHtml.GetBaseAttach(src, prjName, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                            if (attach != null)
                                            {
                                                ToolDb.SaveEntity(attach, "");
                                            }
                                        }
                                        catch { }
                                    }
                                }
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k].GetATag();
                                        if (a.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = a.Link;
                                            }
                                            else
                                            {
                                                link = "http://renshan.huidong.gov.cn/" + a.Link;
                                            }
                                            BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                            base.AttachList.Add(attach);
                                        }
                                    }
                                }
                                if (!crawlAll && list.Count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Exemplo n.º 6
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list     = new List <BidSituation>();
            int    sqlCount = 0;
            string html     = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + this.MaxCount);
            }
            catch { return(null); }
            int startIndex = html.IndexOf("{");
            int endIndex   = html.LastIndexOf("}");

            html = html.Substring(startIndex, (endIndex + 1) - startIndex);
            JavaScriptSerializer        serializer  = new JavaScriptSerializer();
            Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html);

            object[] objvalues = smsTypeJson["rows"] as object[];
            foreach (object objValue in objvalues)
            {
                Dictionary <string, object> dic = (Dictionary <string, object>)objValue;
                string code = string.Empty, prjName = string.Empty, PublicityEndDate = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, ctx = string.Empty, HtmlTxt = string.Empty, beginDate = string.Empty;
                code      = Convert.ToString(dic["bdBH"]);
                prjName   = Convert.ToString(dic["bdName"]);
                beginDate = Convert.ToString(dic["faBuTime2"]);
                string idt = Convert.ToString(dic["bdGuid"]);
                InfoUrl = Convert.ToString(dic["detailUrl"]);
                string attachJson = string.Empty;
                try
                {
                    string urll = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/queryOldOTDataDetail.do?type=5&id=" + idt;
                    HtmlTxt = this.ToolWebSite.GetHtmlByUrl(urll).GetJsString().GetReplace("\\t,\\r,\\n,\"");
                    if (string.IsNullOrWhiteSpace(HtmlTxt))
                    {
                        string kdGuid = Convert.ToString(dic["kbJiLuGuid"]);
                        InfoUrl = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/kbJiLu_View.do?kbJiLuGuid=" + kdGuid;
                        HtmlTxt = this.ToolWebSite.GetHtmlByUrl(InfoUrl);
                        string url = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/querykbJiLuDetail.do?ggGuid=&bdGuid=&kbJiLuGuid=" + kdGuid;
                        attachJson = this.ToolWebSite.GetHtmlByUrl(url);
                    }
                }
                catch (Exception ex) { continue; }

                string gcBh = string.Empty, gcName = string.Empty, gcLeixing = string.Empty,
                       jywTime = string.Empty, kbjiGuid = string.Empty, surl = string.Empty,
                       attachId = string.Empty, attachFileGroupGuid = string.Empty;

                if (!string.IsNullOrWhiteSpace(attachJson))
                {
                    JavaScriptSerializer        newSerializer = new JavaScriptSerializer();
                    Dictionary <string, object> newTypeJson   = (Dictionary <string, object>)newSerializer.DeserializeObject(attachJson);
                    Dictionary <string, object> kdInfo        = (Dictionary <string, object>)newTypeJson["kbJiLu"];

                    try
                    {
                        attachId            = Convert.ToString(kdInfo["kbJiLuGuid"]);
                        attachFileGroupGuid = Convert.ToString(kdInfo["attachFileGroupGuid"]);
                    }
                    catch { }
                    gcLeixing = Convert.ToString(kdInfo["gcLeiXing"]);
                    jywTime   = Convert.ToString(kdInfo["jywFaBuEndTime"]);
                    //https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/kbJiLu_View.do?kbJiLuGuid=9cb75eb8-66b6-441c-9686-471dfa357ff5
                    surl       = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/kbJiLu_View.do?kbJiLuGuid=" + attachFileGroupGuid;
                    attachJson = this.ToolWebSite.GetHtmlByUrl(surl);

                    HtmlTxt = attachJson;
                    Parser   parserNew = new Parser(new Lexer(HtmlTxt));
                    NodeList tableNode = parserNew.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "de_tab1")));
                    if (tableNode != null && tableNode.Count > 0)
                    {
                        HtmlTxt = tableNode.AsHtml();
                        HtmlTxt = HtmlTxt.GetReplace("<td id=\"bdBH\">&nbsp;</td>", "<td id=\"bdBH\">&nbsp;" + code + "</td>");
                        HtmlTxt = HtmlTxt.GetReplace("<td id=\"bdName\">&nbsp;</td>", "<td  id=\"bdName\">&nbsp;" + prjName + "</td>");
                        HtmlTxt = HtmlTxt.GetReplace("<td id=\"gcLeiXing\">&nbsp;</td>", "<td id=\"gcLeiXing\">&nbsp;" + gcLeixing + "</td>");
                        HtmlTxt = HtmlTxt.GetReplace("<td id=\"jieZhiTime\">&nbsp;</td>", "<td id=\"jieZhiTime\">&nbsp;" + jywTime + "</td>");
                        ctx     = HtmlTxt.Replace("</tr>", "\r\n").ToCtxString();
                    }
                }
                ctx = HtmlTxt.ToCtxString();
                string saveUrl = Convert.ToString(dic["detailUrl"]);
                msgType = "深圳市建设工程交易中心宝安分中心";
                BidSituation info = ToolDb.GetBidSituation("广东省", "深圳宝安区工程", "宝安区", code, prjName, PublicityEndDate, msgType, InfoUrl, ctx, HtmlTxt, beginDate);
                sqlCount++;
                if (!crawlAll && sqlCount >= this.MaxCount)
                {
                    return(list);
                }
                if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx))
                {
                    if (!string.IsNullOrWhiteSpace(attachFileGroupGuid))
                    {
                        string moJson = string.Empty;
                        string sUrl   = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/filegroup/queryByGroupGuidZS.do?groupGuid=" + attachFileGroupGuid;
                        try
                        {
                            moJson = this.ToolWebSite.GetHtmlByUrl(sUrl);
                        }
                        catch { }
                        if (!string.IsNullOrWhiteSpace(moJson))
                        {
                            JavaScriptSerializer        newSerializers = new JavaScriptSerializer();
                            Dictionary <string, object> newTypeJsons   = (Dictionary <string, object>)newSerializers.DeserializeObject(moJson);
                            Dictionary <string, object> mofo           = (Dictionary <string, object>)newTypeJsons;
                            object[] objs = (object[])mofo["rows"];
                            foreach (object objAttach in objs)
                            {
                                Dictionary <string, object> attachs = (Dictionary <string, object>)objAttach;
                                string     attachguid = Convert.ToString(attachs["attachGuid"]);
                                string     attachName = Convert.ToString(attachs["attachName"]);
                                string     link       = "https://www.szjsjy.com.cn:8001/file/downloadFile?fileId=" + attachguid;
                                BaseAttach attach     = ToolHtml.GetBaseAttach(link, attachName, info.Id, "SiteManage\\Files\\Attach\\");
                                if (attach != null)
                                {
                                    ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                                }
                            }
                        }
                    }
                    else
                    {
                        Parser   parser   = new Parser(new Lexer(HtmlTxt));
                        NodeList fileNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                        if (fileNode != null && fileNode.Count > 0)
                        {
                            for (int f = 0; f < fileNode.Count; f++)
                            {
                                ATag tag = fileNode[f] as ATag;

                                try
                                {
                                    BaseAttach attach = null;
                                    string     link   = string.Empty;
                                    if (tag.Link.ToLower().Contains("http"))
                                    {
                                        link = tag.Link;
                                        if (link.Contains("\\"))
                                        {
                                            link = link.Replace("\\", "");
                                        }
                                    }
                                    else
                                    {
                                        link = "https://www.szjsjy.com.cn:8001/" + tag.Link;
                                    }
                                    attach = ToolHtml.GetBaseAttach(link, tag.LinkText, info.Id, "SiteManage\\Files\\Attach\\");

                                    if (attach != null)
                                    {
                                        ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                                    }
                                }
                                catch { continue; }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Exemplo n.º 7
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default, ref cookiestr);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "lblPageCount")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[0].ToNodePlainString();
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    //string str = System.Web.HttpUtility.("%A1%AA%A1%AA%C6%F3%D2%B5%A1%AA%A1%AA");
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__VIEWSTATE",
                        "__EVENTVALIDATION",
                        "textfield",
                        "textfield",
                        "select",
                        "SearchName",
                        "SearchNo",
                        "txtSqlText",
                        "checkPage"
                    },
                                                                                      new string[] {
                        "Linkbutton3",
                        "",
                        viewState,
                        eventValidation,
                        "", "",
                        "%A1%AA%A1%AA%C6%F3%D2%B5%A1%AA%A1%AA",
                        "", "",
                        " FProjectName like ''%%'' and FTNO like ''%%''",
                        (i - 1).ToString()
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "dgData")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty,
                               city = string.Empty;
                        TableRow tr = table.Rows[j];
                        prjName = tr.Columns[0].ToNodePlainString();
                        city    = tr.Columns[1].ToNodePlainString();
                        endDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoUrl = "http://www1.cqjsxx.com/webcqjg/GcxxFolder/" + tr.Columns[0].GetATagHref();
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "DetailTable")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.AsHtml();
                            TableTag tag = dtlNode[0] as TableTag;
                            for (int r = 0; r < tag.RowCount; r++)
                            {
                                for (int c = 0; c < tag.Rows[r].ColumnCount; c++)
                                {
                                    if ((c + 1) % 2 == 0)
                                    {
                                        inviteCtx += tag.Rows[r].Columns[c].ToPlainTextString().ToNodeString() + "\r\n";
                                    }
                                    else
                                    {
                                        inviteCtx += tag.Rows[r].Columns[c].ToPlainTextString().ToNodeString() + ":";
                                    }
                                }
                            }
                            beginDate = inviteCtx.GetRegex("备案日期").GetDateRegex();
                            if (string.IsNullOrEmpty(beginDate))
                            {
                                beginDate = inviteCtx.GetRegex("备案日期");
                            }
                            if (string.IsNullOrEmpty(beginDate))
                            {
                                beginDate = inviteCtx.GetRegex("开始日期").GetDateRegex();
                            }
                            if (string.IsNullOrEmpty(beginDate))
                            {
                                beginDate = inviteCtx.GetRegex("开始日期");
                            }
                            if (string.IsNullOrEmpty(beginDate))
                            {
                                beginDate = DateTime.Now.ToString("yyyy-MM-dd");
                            }
                            buildUnit  = inviteCtx.GetBuildRegex();
                            prjAddress = inviteCtx.GetAddressRegex();
                            code       = inviteCtx.GetCodeRegex();
                            specType   = "建设工程";
                            inviteType = prjName.GetInviteBidType();
                            msgType    = "重庆市工程建设招标投标交易中心";
                            InviteInfo info = ToolDb.GenInviteInfo("重庆市", "重庆市及区县", city, string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www1.cqjsxx.com/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Exemplo n.º 8
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            //取得页码
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("a"), new HasAttributeFilter("id", "PageDataList__ctl7_LinkButton1")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    string temp = pageList.AsString();
                    pageInt = Convert.ToInt32(temp.GetRegexBegEnd("共", "页"));
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        viewState = this.ToolWebSite.GetAspNetViewState(html);
                        NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(
                            new string[] {
                            "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "head1:username",
                            "head1:Password", "head1:rbLoginType", "Tb_keyword", "ddlNewsType", "ddlistaddnewsdate"
                        },
                            new string[] {
                            "PageDataList$_ctl" + (i + 1).ToString() + "$LinkButton1", "", viewState, "", "", "unit", "", "20", ""
                        }
                            );
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", " tb_list")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        TableRow tr = table.Rows[j];
                        infoType    = "通知公告";
                        releaseTime = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        headName    = tr.Columns[1].ToNodePlainString();
                        infoUrl     = "http://www.szpark.com.cn" + tr.Columns[1].GetATagHref();
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = ToolHtml.GetHtmlByUrl(infoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList noList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "newsinfo")));
                        if (noList != null && noList.Count > 0)
                        {
                            ctxHtml    = noList.AsHtml().Replace("<br/>", "\r\n").Replace("<BR/>", "");
                            infoCtx    = ctxHtml.ToCtxString().Replace(" ", "").Replace("&nbsp;", "").Replace("\t\t", "\t").Replace("\t\t", "\t");
                            infoCtx    = Regex.Replace(infoCtx, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase).Replace(" ", "").Replace("\t", "").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n");
                            msgType    = MsgTypeCosnt.ShenZhenFJYLMsgType;
                            infoScorce = infoScorce.Replace("&nbsp;", "");
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "深圳市工程", string.Empty, infoCtx, infoType);
                            sqlCount++;
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                            {
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList imgList = parser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                                if (imgList != null && imgList.Count > 0)
                                {
                                    for (int m = 0; m < imgList.Count; m++)
                                    {
                                        try
                                        {
                                            ImageTag img = imgList[m] as ImageTag;
                                            string   src = img.GetAttribute("src");
                                            if (src.ToLower().Contains(".gif"))
                                            {
                                                continue;
                                            }
                                            BaseAttach obj = null;
                                            if (src.Contains("http"))
                                            {
                                                obj = ToolHtml.GetBaseAttach(src, headName, info.Id);
                                            }
                                            else
                                            {
                                                obj = ToolHtml.GetBaseAttach("http://www.szpark.com.cn" + src.Replace("../", "/").Replace("./", "/"), headName, info.Id);
                                            }
                                            if (obj != null)
                                            {
                                                ToolDb.SaveEntity(obj, string.Empty);
                                            }
                                        }
                                        catch { }
                                    }
                                }
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int a = 0; a < aNode.Count; a++)
                                    {
                                        ATag aTag = aNode[a] as ATag;
                                        if (aTag.IsAtagAttach())
                                        {
                                            try
                                            {
                                                BaseAttach obj  = null;
                                                string     href = aTag.GetATagHref();
                                                if (href.Contains("http"))
                                                {
                                                    obj = ToolHtml.GetBaseAttach(href, aTag.LinkText, info.Id);
                                                }
                                                else
                                                {
                                                    obj = ToolHtml.GetBaseAttach("http://www.szpark.com.cn" + href.Replace("../", "/").Replace("./", "/"), aTag.LinkText, info.Id);
                                                }
                                                if (obj != null)
                                                {
                                                    ToolDb.SaveEntity(obj, string.Empty);
                                                }
                                            }
                                            catch { }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
Exemplo n.º 9
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list     = new List <InviteInfo>();
            int    sqlCount = 0;
            string html     = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + this.MaxCount);
            }
            catch { return(null); }
            int startIndex = html.IndexOf("{");
            int endIndex   = html.LastIndexOf("}");

            html = html.Substring(startIndex, (endIndex + 1) - startIndex);
            JavaScriptSerializer        serializer  = new JavaScriptSerializer();
            Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html);

            object[] objvalues = smsTypeJson["rows"] as object[];
            foreach (object objValue in objvalues)
            {
                Dictionary <string, object> dic = (Dictionary <string, object>)objValue;
                string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                code    = Convert.ToString(dic["gcBH"]);
                prjName = Convert.ToString(dic["gcName"]);
                //if (!prjName.Contains("新安翻身小学教学楼防水工程(小型工程)"))
                //    continue;


                beginDate = Convert.ToString(dic["ggStartTime2"]).GetDateRegex();
                string end = Convert.ToString(dic["ggEndTime"]);
                try
                {
                    endDate = ToolHtml.GetDateTimeByLong(Convert.ToInt64(end)).ToString();
                }
                catch { }
                inviteType = Convert.ToString(dic["gcLeiXing2"]);
                InfoUrl    = Convert.ToString(dic["detailUrl"]);
                try
                {
                    string urll = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/queryOldOTDataDetail.do?type=1&id=" + dic["gcGuid"];
                    try
                    {
                        HtmlTxt = this.ToolWebSite.GetHtmlByUrl(urll).GetJsString().GetReplace("\\t,\\r,\\n,\"");
                    }
                    catch {  }
                    if (string.IsNullOrWhiteSpace(HtmlTxt))
                    {
                        urll = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/showGongGao.do?ggGuid=" + dic["ggGuid"];
                    }

                    HtmlTxt = this.ToolWebSite.GetHtmlByUrl(urll).GetJsString().GetReplace("\\t,\\r,\\n,\"");
                    HtmlTxt = HtmlTxt.GetReplace("},{,maoDian:,html:");

                    if (string.IsNullOrWhiteSpace(HtmlTxt))
                    {
                        urll    = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/queryOldOTDataDetail.do?type=1&id=" + dic["gcGuid"];
                        HtmlTxt = this.ToolWebSite.GetHtmlByUrl(urll).GetJsString().GetReplace("\\t,\\r,\\n,\"");
                    }
                }
                catch {
                    //Logger.Error(prjName);
                    continue;
                }
                inviteCtx = HtmlTxt.Replace("</span>", "\r\n").Replace("<br />", "\r\n").Replace("<BR>", "\r\n").Replace("<br/>", "\r\n").ToCtxString();

                prjAddress = inviteCtx.GetAddressRegex();
                buildUnit  = inviteCtx.GetBuildRegex();
                if (string.IsNullOrEmpty(code))
                {
                    code = inviteCtx.GetCodeRegex();
                }
                msgType  = "深圳市建设工程交易中心宝安分中心";
                specType = "建设工程";

                InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳宝安区工程", "宝安区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                sqlCount++;

                if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx))
                {
                    Parser   parser = new Parser(new Lexer(HtmlTxt));
                    NodeList aNode  = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                    if (aNode != null && aNode.Count > 0)
                    {
                        for (int k = 0; k < aNode.Count; k++)
                        {
                            ATag a = aNode[k] as ATag;
                            if (a.IsAtagAttach())
                            {
                                string link = string.Empty;
                                if (a.Link.ToLower().Contains("http"))
                                {
                                    link = a.Link.Replace("\\", "");

                                    BaseAttach attach = null;
                                    try
                                    {
                                        attach = ToolHtml.GetBaseAttach(link, a.LinkText, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                    }
                                    catch { }
                                    if (attach != null)
                                    {
                                        ToolDb.SaveEntity(attach, "");
                                    }
                                }
                            }
                        }
                    }
                }
                if (!crawlAll && sqlCount >= this.MaxCount)
                {
                    return(list);
                }
            }
            return(list);
        }
Exemplo n.º 10
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <NoticeInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default, ref cookiestr);
            }
            catch { }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "BcwjInfoList1_Pager")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[0].ToNodePlainString().GetRegexBegEnd("1/", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState = this.ToolWebSite.GetAspNetViewState(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__VIEWSTATE",
                        "BcwjInfoList1:KeyWord",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT"
                    }, new string[] {
                        viewState,
                        "",
                        "BcwjInfoList1:Pager",
                        i.ToString()
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "BcwjInfoList1_DataGrid1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string InfoTitle = string.Empty, InfoType = string.Empty, PublistTime = string.Empty, InfoCtx = string.Empty, InfoUrl = string.Empty, prjCode = string.Empty, buildUnit = string.Empty, htmlTxt = string.Empty;
                        InfoType = "补充通知";
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        InfoTitle   = aTag.GetAttribute("title").GetReplace(";");
                        prjCode     = tr.Columns[1].ToNodePlainString().GetReplace("[", "【").GetReplace("]", "】").GetRegexBegEnd("【", "】");
                        PublistTime = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoUrl     = aTag.Link;
                        if (!InfoUrl.Contains("http"))
                        {
                            continue;
                        }
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "spnShow")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            htmlTxt = dtlNode.AsHtml();
                            InfoCtx = htmlTxt.GetReplace("</p>,<br />,<br/>", "\r\n").ToCtxString();
                            NoticeInfo info = ToolDb.GenNoticeInfo("浙江省", "浙江省及地市", "", string.Empty, InfoTitle, InfoType, InfoCtx, PublistTime, string.Empty, "浙江省公共资源交易中心", InfoUrl, prjCode, buildUnit, string.Empty, string.Empty, "政府采购", "建设工程", htmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(htmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://downc.zmctc.com/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Exemplo n.º 11
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "pageBtn")));

            if (pageNode != null && pageNode.Count > 0)
            {
                string temp = pageNode[0].ToPlainTextString().GetRegexBegEnd("共", "页");
                try
                {
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&PageNo=" + i, Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList viewList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "news")), true), new TagNameFilter("li")));
                if (viewList != null && viewList.Count > 0)
                {
                    for (int j = 0; j < viewList.Count; j++)
                    {
                        string prjName = string.Empty, InfoUrl = string.Empty, beginDate = string.Empty, HtmlTxt = string.Empty;

                        ATag aTag = viewList[j].GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }
                        prjName   = aTag.GetAttribute("title");
                        beginDate = viewList[j].ToNodePlainString().GetDateRegex();
                        InfoUrl   = "http://www.zcjsglj.gov.cn" + aTag.Link;
                        string htlDtl = string.Empty;
                        try
                        {
                            htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htlDtl));
                        NodeList dtl = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "newscontent")));
                        if (dtl != null && dtl.Count > 0)
                        {
                            HtmlTxt = dtl.AsHtml();

                            if (prjName.Contains("中标") || prjName.Contains("成交") || prjName.Contains("结果") || prjName.Contains("候选人公示"))
                            {
                                string buildUnit = string.Empty, bidUnit = string.Empty,
                                       bidMoney = string.Empty, code = string.Empty,
                                       bidDate = string.Empty, endDate = string.Empty,
                                       bidType = string.Empty, specType = string.Empty,
                                       msgType = string.Empty, bidCtx = string.Empty,
                                       prjAddress = string.Empty, remark = string.Empty,
                                       prjMgr = string.Empty, otherType = string.Empty;
                                bidCtx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString();
                                if (prjName.Length == 4)
                                {
                                    string tempName = bidCtx.GetRegex("工程名称,项目名称");
                                    if (!string.IsNullOrEmpty(tempName))
                                    {
                                        prjName = tempName;
                                    }
                                }
                                code      = bidCtx.GetCodeRegex().GetCodeDel();
                                buildUnit = bidCtx.GetBuildRegex();
                                if (buildUnit.Contains("招标代理"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理"));
                                }
                                if (buildUnit.Contains("公司"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                                }

                                bidUnit = bidCtx.GetBidRegex();
                                if (string.IsNullOrEmpty(bidUnit))
                                {
                                    bidUnit = bidCtx.GetRegex("中标候选人为,中标候选公司,中标候选人");
                                }
                                bidMoney = bidCtx.GetMoneyRegex();
                                prjMgr   = bidCtx.GetMgrRegex();
                                try
                                {
                                    if (decimal.Parse(bidMoney) > 100000)
                                    {
                                        bidMoney = (decimal.Parse(bidMoney) / 10000).ToString();
                                    }
                                }
                                catch { }

                                msgType  = "广州市增城区住房和建设局";
                                specType = "政府采购";
                                bidType  = prjName.GetInviteBidType();
                                BidInfo info = ToolDb.GenBidInfo("广东省", "广州政府采购", "增城区", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType,
                                                                 bidMoney, InfoUrl, prjMgr, HtmlTxt);
                                list.Add(info);

                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k].GetATag();
                                        if (a.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = a.Link;
                                            }
                                            else
                                            {
                                                link = "http://www.zcjsglj.gov.cn/" + a.Link;
                                            }
                                            BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                            base.AttachList.Add(attach);
                                        }
                                    }
                                }
                                if (!crawlAll && list.Count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }
                            else
                            {
                                string code = string.Empty, buildUnit = string.Empty,
                                       prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                                       specType = string.Empty, endDate = string.Empty,
                                       remark = string.Empty, inviteCon = string.Empty,
                                       CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty;

                                inviteCtx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString();

                                if (prjName.Length == 4)
                                {
                                    string tempName = inviteCtx.GetRegex("工程名称,项目名称");
                                    if (!string.IsNullOrEmpty(tempName))
                                    {
                                        prjName = tempName;
                                    }
                                }
                                inviteType = prjName.GetInviteBidType();

                                code       = inviteCtx.GetCodeRegex().GetCodeDel();
                                buildUnit  = inviteCtx.GetBuildRegex();
                                prjAddress = inviteCtx.GetAddressRegex();
                                if (buildUnit.Contains("招标代理"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理"));
                                }
                                if (buildUnit.Contains("公司"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                                }


                                msgType = "广州市增城区住房和建设局";

                                specType = "政府采购";

                                InviteInfo info = ToolDb.GenInviteInfo("广东省", "广州政府采购", "增城区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                                list.Add(info);

                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k].GetATag();
                                        if (a.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = a.Link;
                                            }
                                            else
                                            {
                                                link = "http://www.zcjsglj.gov.cn/" + a.Link;
                                            }
                                            BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                            base.AttachList.Add(attach);
                                        }
                                    }
                                }
                                if (!crawlAll && list.Count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Exemplo n.º 12
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new List <BidInfo>();
            //取得页码
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl + "1", Encoding.UTF8);
            }
            catch
            {
                return(list);
            }

            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("class", "paging"), new TagNameFilter("div")));

            if (sNode != null && sNode.Count > 0)
            {
                string temp = sNode[0].ToNodePlainString();
                try
                {
                    temp    = temp.GetRegexBegEnd("/", "转到");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }

            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl + i, Encoding.UTF8);
                    }
                    catch
                    {
                        continue;
                    }
                }

                parser = new Parser(new Lexer(html));
                sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new HasAttributeFilter("class", "column-info-list"), new TagNameFilter("div")), true), new TagNameFilter("li")));
                if (sNode != null && sNode.Count > 0)
                {
                    for (int t = 0; t < sNode.Count; t++)
                    {
                        string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty,
                               code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty,
                               bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty,
                               otherType = string.Empty, HtmlTxt = string.Empty;
                        ATag aTag = sNode[t].GetATag();
                        prjName   = aTag.LinkText.ToNodeString();
                        InfoUrl   = "http://ggzy.zhaoqing.gov.cn" + aTag.Link;
                        beginDate = sNode[t].ToPlainTextString().GetDateRegex();
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch
                        {
                            continue;
                        }
                        Parser   dtlparser = new Parser(new Lexer(htmldetail));
                        NodeList dtnode    = dtlparser.ExtractAllNodesThatMatch(new TagNameFilter("body"));
                        if (dtnode != null && dtnode.Count > 0)
                        {
                            HtmlTxt = dtnode.AsHtml();
                            bidCtx  = HtmlTxt.ToCtxString();

                            bidUnit  = bidCtx.GetBidRegex();
                            bidMoney = bidCtx.GetMoneyRegex();

                            if (string.IsNullOrWhiteSpace(bidUnit))
                            {
                                dtlparser = new Parser(new Lexer(HtmlTxt));
                                NodeList tableNode = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("border", "1")));
                                if (tableNode == null || tableNode.Count < 1)
                                {
                                    dtlparser.Reset();
                                    tableNode = dtlparser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                                }
                                if (tableNode != null && tableNode.Count > 0)
                                {
                                    string   ctx   = string.Empty;
                                    TableTag table = tableNode[0] as TableTag;
                                    if (table.Rows[0].ColumnCount >= 2)
                                    {
                                        for (int j = 1; j < table.RowCount; j++)
                                        {
                                            ctx += table.Rows[j].Columns[0].ToNodePlainString() + ":";
                                            ctx += table.Rows[j].Columns[1].ToNodePlainString() + "\r\n";
                                        }
                                        bidUnit = ctx.GetBidRegex();
                                        if (string.IsNullOrWhiteSpace(bidUnit))
                                        {
                                            bidUnit = ctx.GetRegex("单位名称,第一中标候选人");
                                        }
                                        bidMoney = ctx.GetMoneyRegex();
                                        prjMgr   = ctx.GetMgrRegex();
                                    }
                                }
                            }
                            buildUnit  = bidCtx.GetBuildRegex();
                            prjAddress = bidCtx.GetAddressRegex();
                            code       = bidCtx.GetCodeRegex();

                            msgType  = "肇庆市公共资源交易中心";
                            specType = "建设工程";

                            prjName = ToolDb.GetPrjName(prjName);
                            bidType = ToolHtml.GetInviteTypes(prjName);
                            BidInfo info = ToolDb.GenBidInfo("广东省", "肇庆市区", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            dtlparser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = dtlparser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int a = 0; a < aNode.Count; a++)
                                {
                                    ATag fileTag = aNode[a] as ATag;
                                    if (fileTag.IsAtagAttach())
                                    {
                                        string url = string.Empty;
                                        if (fileTag.Link.Contains("http"))
                                        {
                                            url = fileTag.Link;
                                        }
                                        else
                                        {
                                            url = this.SiteUrl + beginDate.GetReplace("-").Substring(0, 6) + fileTag.Link.GetReplace("./", "/");
                                        }
                                        BaseAttach item = ToolDb.GenBaseAttach(fileTag.LinkText, info.Id, url);

                                        base.AttachList.Add(item);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Exemplo n.º 13
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 295;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;


            for (int i = 1; i < pageInt; i++)
            {
                NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "classId", "key", "page" }, new string[] { "151", "-1", i.ToString() });
                try
                {
                    html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                }
                catch { return(list); }

                Parser   parser   = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("li"));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        ATag aTag = listNode[j].GetATag();

                        prjName   = aTag.GetAttribute("title").GetReplace("\\\"");
                        beginDate = listNode[j].ToPlainTextString().GetDateRegex();

                        InfoUrl = "http://bid.aited.cn/" + aTag.Link.GetReplace("../,\\\"");
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "news_article")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml();
                            inviteCtx  = HtmlTxt.ToCtxString();
                            code       = inviteCtx.GetCodeRegex().GetCodeDel();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            prjAddress = inviteCtx.GetAddressRegex().GetCodeDel();
                            msgType    = "中航技国际经贸发展有限公司";
                            specType   = "建设工程";
                            inviteType = prjName.GetInviteBidType();
                            InviteInfo info = ToolDb.GenInviteInfo("北京市", "北京市区", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach() || a.Link.Contains("DownloadServlet"))
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://bid.aited.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Exemplo n.º 14
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch { return(null); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "AspNetPager1")), true), new TagNameFilter("a")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[pageNode.Count - 1].GetATagHref().GetRegexBegEnd(",'", "'");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__VIEWSTATE",
                        "__VIEWSTATEGENERATOR",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__EVENTVALIDATION",
                        "TBKey",
                        "AspNetPager1_input"
                    }, new string[] {
                        viewState,
                        "E997B95C",
                        "AspNetPager1",
                        i.ToString(),
                        eventValidation,
                        "",
                        (i - 1).ToString()
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "GridView1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string   headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        headName    = aTag.LinkText;
                        infoType    = "通知公告";
                        releaseTime = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        infoUrl     = "http://www.sdzb.gov.cn/" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "96%")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            ctxHtml = dtlNode.AsHtml();
                            infoCtx = ctxHtml.ToCtxString();
                            msgType = "山东省建设工程招标投标管理办公室";
                            List <string> attach = new List <string>();
                            parser = new Parser(new Lexer(ctxHtml));
                            NodeList imgNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                            if (imgNode != null && imgNode.Count > 0)
                            {
                                for (int p = 0; p < imgNode.Count; p++)
                                {
                                    ImageTag img  = imgNode[p] as ImageTag;
                                    string   link = "http://www.sdzb.gov.cn" + img.ImageURL.GetReplace("../,./");
                                    ctxHtml = ctxHtml.GetReplace(img.ImageURL, link);
                                    attach.Add(link);
                                }
                            }

                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "山东省", "山东省及地市", "", infoCtx, infoType);
                            sqlCount++;
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                            {
                                if (attach.Count > 0)
                                {
                                    for (int a = 0; a < attach.Count; a++)
                                    {
                                        try
                                        {
                                            BaseAttach entity = ToolHtml.GetBaseAttachByUrl(attach[a], headName, info.Id);
                                            if (entity != null)
                                            {
                                                ToolDb.SaveEntity(entity, "SourceID,AttachServerPath");
                                            }
                                        }
                                        catch { }
                                    }
                                }
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k] as ATag;
                                        if (a.Link.ToLower().Contains("download") || a.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = a.Link;
                                            }
                                            else
                                            {
                                                link = "http://www.sdzb.gov.cn" + a.Link.GetReplace("../,./");
                                            }
                                            if (Encoding.Default.GetByteCount(link) > 500)
                                            {
                                                continue;
                                            }
                                            try
                                            {
                                                BaseAttach entity = ToolHtml.GetBaseAttachByUrl(link, a.LinkText, info.Id);
                                                if (entity != null)
                                                {
                                                    ToolDb.SaveEntity(entity, "SourceID,AttachServerPath");
                                                }
                                            }
                                            catch { }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
Exemplo n.º 15
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "page")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[0].ToPlainTextString().GetRegexBegEnd("共", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://ggzy.zhuhai.gov.cn//zbgg/index_" + i + ".htm", Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "news")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        INode node = listNode[j];
                        ATag  aTag = node.GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;


                        prjName   = aTag.GetAttribute("title");
                        beginDate = node.ToPlainTextString().GetDateRegex();
                        InfoUrl   = aTag.Link;

                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "m_r m_r_g")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml();
                            inviteCtx  = HtmlTxt.ToCtxString();
                            buildUnit  = inviteCtx.GetReplace(" ").GetBuildRegex();
                            code       = inviteCtx.GetReplace(" ").GetCodeRegex().GetCodeDel();
                            prjAddress = inviteCtx.GetReplace(" ").GetAddressRegex();
                            if (buildUnit.Contains("管理局"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("管理局")) + "管理局";
                            }
                            if (buildUnit.Contains("联系"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("联系"));
                            }
                            if (buildUnit.Contains("价格"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("价格"));
                            }
                            msgType    = "珠海市公共资源交易中心";
                            specType   = "建设工程";
                            inviteType = prjName.GetInviteBidType();
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "珠海市区", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://ggzy.zhuhai.gov.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Exemplo n.º 16
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <NoticeInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("nowrap", "true")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("总页数", "当前页").Replace(":", "");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "?Paging=" + i, Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "99%")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount - 1; j++)
                    {
                        string InfoTitle = string.Empty, InfoType = string.Empty, PublistTime = string.Empty, InfoCtx = string.Empty, InfoUrl = string.Empty, prjCode = string.Empty, buildUnit = string.Empty, htmlTxt = string.Empty, area = string.Empty;

                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        InfoTitle   = aTag.GetAttribute("title");
                        PublistTime = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoUrl     = "http://www.gxzbtb.cn" + aTag.Link;
                        InfoType    = "澄清变更";

                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            htmlTxt   = dtlNode.AsHtml().GetJsString();
                            InfoCtx   = htmlTxt.ToCtxString();
                            buildUnit = InfoCtx.GetBuildRegex();

                            NoticeInfo info = ToolDb.GenNoticeInfo("广西壮族自治区", "广西壮族自治区及地市", area, string.Empty, InfoTitle, InfoType, InfoCtx, PublistTime, string.Empty, "广西壮族自治区公共资源交易中心", InfoUrl, prjCode, buildUnit, string.Empty, string.Empty, "交通工程", string.Empty, htmlTxt);
                            parser = new Parser(new Lexer(htmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k].GetATag();
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.gxzbtb.cn" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }

                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Exemplo n.º 17
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            string urlList  = "http://www.ezztb.gov.cn/jiaoyixinxi/queryJiaoYiXinXiPagination.do?bianHao=&gongChengLeiBie=&gongChengType=&gongShiType=10&page=1&title=&type=10&rows=";
            IList  list     = new List <InviteInfo>();
            int    sqlCount = 0;
            string html     = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(urlList + this.MaxCount);
            }
            catch { return(null); }
            int startIndex = html.IndexOf("{");
            int endIndex   = html.LastIndexOf("}");

            html = html.Substring(startIndex, (endIndex + 1) - startIndex);
            JavaScriptSerializer        serializer  = new JavaScriptSerializer();
            Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html);

            object[] objvalues = smsTypeJson["rows"] as object[];
            foreach (object objValue in objvalues)
            {
                Dictionary <string, object> dic = (Dictionary <string, object>)objValue;
                string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                       prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                       specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                       remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                       CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;

                code       = Convert.ToString(dic["bianHao"]);
                prjName    = Convert.ToString(dic["title"]);
                beginDate  = Convert.ToString(dic["faBuStartTimeText"]).GetDateRegex();
                inviteType = Convert.ToString(dic["gongChengTypeText"]);

                if (prjName.Contains("测试"))
                {
                    continue;
                }
                InfoUrl = "http://www.ezztb.gov.cn/jyw/jyw/showGongGao.do?ggGuid=" + dic["yuanXiTongId"];

                try
                {
                    HtmlTxt = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8);
                    JavaScriptSerializer        Newserializer = new JavaScriptSerializer();
                    Dictionary <string, object> newTypeJson   = (Dictionary <string, object>)Newserializer.DeserializeObject(HtmlTxt);
                    HtmlTxt = Convert.ToString(newTypeJson["html"]);
                    if (string.IsNullOrWhiteSpace(HtmlTxt))
                    {
                        string url     = "http://www.ezztb.gov.cn/jiaoyixingxi/zbgg_view.html?guid=" + dic["yuanXiTongId"];
                        string htmldtl = this.ToolWebSite.GetHtmlByUrl(url);
                    }
                }
                catch (Exception ex) { continue; }
                inviteCtx = HtmlTxt.Replace("</span>", "\r\n").ToCtxString();

                prjAddress = inviteCtx.GetAddressRegex();
                buildUnit  = inviteCtx.GetBuildRegex();
                //if (string.IsNullOrWhiteSpace(buildUnit))
                //    buildUnit = inviteCtx.GetRegex("招标人与招标代理建设单位");
                if (string.IsNullOrEmpty(code))
                {
                    code = inviteCtx.GetCodeRegex();
                }
                msgType  = "鄂州市公共资源交易中心";
                specType = "建设工程";
                if (string.IsNullOrWhiteSpace(inviteType))
                {
                    inviteType = prjName.GetInviteBidType();
                }
                buildUnit = buildUnit.Replace(" ", "");
                InviteInfo info = ToolDb.GenInviteInfo("湖北省", "湖北省及地市", "鄂州市", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                list.Add(info);
                if (!crawlAll && list.Count >= this.MaxCount)
                {
                    return(list);
                }
                Parser   parser = new Parser(new Lexer(HtmlTxt));
                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                if (aNode != null && aNode.Count > 0)
                {
                    for (int k = 0; k < aNode.Count; k++)
                    {
                        ATag a = aNode[k] as ATag;
                        if (a.IsAtagAttach())
                        {
                            string link = string.Empty;
                            if (a.Link.ToLower().Contains("http"))
                            {
                                link = a.Link;
                            }
                            else
                            {
                                link = "http://www.ezztb.gov.cn/" + a.Link;
                            }
                            BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                            base.AttachList.Add(attach);
                        }
                    }
                }
            }
            return(list);
        }
Exemplo n.º 18
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default, ref cookiestr);
                Regex regexHtml = new Regex(@"<script[^<]*</script>");
                htl = regexHtml.Replace(htl, "");
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(htl));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("colSpan", "6")));

            if (nodeList != null && nodeList.Count > 0)
            {
                Regex regexPage = new Regex(@"共\d+页");
                page = int.Parse(regexPage.Match(nodeList.AsString()).Value.Trim(new char[] { '共', '页' }));
            }
            for (int i = 1; i < page; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(htl);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(htl);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__VIEWSTATE",
                        "key",
                        "AxGridView1$ctl23$ctl07",
                        "AxGridView1$ctl23$pageList",
                        "__VIEWSTATEENCRYPTED",
                        "__EVENTVALIDATION"
                    }, new string[] {
                        "AxGridView1$ctl23$ctl03",
                        string.Empty,
                        viewState,
                        string.Empty,
                        "20",
                        (i - 1).ToString(),
                        string.Empty,
                        eventValidation
                    });
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.Default, ref cookiestr);
                    }
                    catch (Exception ex) { continue; }
                }
                parser = new Parser(new Lexer(htl));
                NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "AxGridView1")));
                if (tableNodeList != null && tableNodeList.Count > 0)
                {
                    TableTag table = (TableTag)tableNodeList[0];
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr = table.Rows[j];
                        code    = tr.Columns[2].ToPlainTextString().Trim();
                        prjName = tr.Columns[3].ToPlainTextString().Trim();
                        //endDate = tr.Columns[4].ToPlainTextString().Replace("&nbsp; ", "").Trim().Substring(0, 10);
                        ATag aTag = tr.Columns[5].SearchFor(typeof(ATag), true)[0] as ATag;
                        InfoUrl = "http://www.yjgcjy.cn/" + aTag.Link;
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(InfoUrl), Encoding.Default).Replace("&nbsp;", "");
                        }
                        catch (Exception)
                        {
                            Logger.Error("InviteYJYXJS");
                            continue;
                        }
                        Parser   parserdetail = new Parser(new Lexer(htmldetail));
                        NodeList dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellSpacing", "1")));
                        if (dtnode != null && dtnode.Count > 0)
                        {
                            HtmlTxt = dtnode.AsHtml();
                            TableTag tableRow = (TableTag)dtnode[0];
                            for (int k = 1; k < tableRow.RowCount; k++)
                            {
                                TableRow trow = tableRow.Rows[k];
                                for (int c = 0; c < trow.ColumnCount; c++)
                                {
                                    string tr1 = string.Empty;
                                    tr1        = trow.Columns[c].ToPlainTextString().Trim();
                                    inviteCtx += tr1;
                                }
                                inviteCtx += "\r\n";
                            }
                            Regex regPrjAddr = new Regex(@"工程建设地址:[^\r\n]+\r\n");
                            try
                            {
                                prjAddress = regPrjAddr.Match(inviteCtx).Value.Replace("工程建设地址", "").Replace(":", "").Replace("。", "").Replace("、", "").Replace(";", "").Replace(",", "").Trim();
                                if (Encoding.Default.GetByteCount(prjAddress) > 200 || prjAddress == "")
                                {
                                    prjAddress = "见招标详细信息";
                                }
                            }
                            catch (Exception)
                            {
                                prjAddress = "见招标详细信息";
                            }
                            Regex regBegin = new Regex(@"公告发布时间:[^\r\n]+[\r\n]{1}");
                            beginDate = regBegin.Match(inviteCtx).Value.Replace("公告发布时间:", "").Trim();
                            string date    = beginDate.Replace(" ", "").Trim();
                            Regex  regDate = new Regex(@"\d{4}年\d{1,2}月\d{1,2}日");
                            beginDate = regDate.Match(date).Value.Trim();
                            if (beginDate == "")
                            {
                                Regex regDateT = new Regex(@"[u4e00-u9fa5]{4}年[u4e00-u9fa5]{1,2}月[u4e00-u9fa5]{1,2}日");
                                beginDate = regDateT.Match(inviteCtx).Value.Replace("公告发布时间:", "").Trim();
                            }
                            if (beginDate == "")
                            {
                                beginDate = string.Empty;
                            }
                            Regex bildUnit = new Regex(@"建设单位:[^\r\n]+[\r\n]{1}");
                            buildUnit = bildUnit.Match(inviteCtx).Value.Replace("建设单位:", "").Trim();
                            if (buildUnit == "")
                            {
                                buildUnit = "";
                            }
                            msgType    = "阳江市建设工程交易中心";
                            specType   = "建设工程";
                            inviteType = ToolHtml.GetInviteTypes(prjName);
                            inviteCtx  = inviteCtx.Replace("<?", "").Replace("xml:namespace prefix = o ns = ", "").Replace("urn:schemas-microsoft-com:office:office", "").Replace("/>", "").Trim();
                            inviteCtx  = inviteCtx.Replace("<?", "").Replace("xml:namespace prefix = ns0 ns = ", "").Replace("urn:schemas-microsoft-com:office:smarttags", "").Replace("/>", "").Trim();
                            inviteCtx  = inviteCtx.Replace("<?", "").Replace("xml:namespace prefix = st1 ns = ", "").Replace("urn:schemas-microsoft-com:office:smarttags", "").Replace("/>", "").Trim();
                            inviteCtx  = inviteCtx.Replace("xml:namespace prefix = st1", "").Trim();
                            inviteCtx  = inviteCtx.Replace("点击进入留言", "").Trim();
                            code       = code.Replace(";", "").Replace(":", "").Trim();
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "阳江市区", "阳西县", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parserdetail.Reset();
                            NodeList fileNode = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellSpacing", "1")));
                            if (fileNode != null && fileNode.Count > 0 && fileNode[0] is TableTag)
                            {
                                TableTag fileTable = fileNode[0] as TableTag;
                                for (int f = 10; f < fileTable.RowCount; f++)
                                {
                                    TableRow trowFile = fileTable.Rows[f];
                                    for (int z = 0; z < 1; z++)
                                    {
                                        string tr1 = string.Empty;
                                        tr1 = trowFile.Columns[z].ToPlainTextString().Trim();
                                        if (tr1.Contains("下载招标文件:") || tr1.Contains("下载工程量清单:") || tr1.Contains("下载图纸:"))
                                        {
                                            if (fileTable.Rows[f].Columns[z + 1].ToPlainTextString().Trim() != "")
                                            {
                                                int tt = fileTable.Rows[f].Columns[z + 1].SearchFor(typeof(ATag), true).Count;
                                                for (int ii = 0; ii < tt; ii++)
                                                {
                                                    string st3       = fileTable.Rows[f].Columns[z + 1].SearchFor(typeof(ATag), true)[ii].ToPlainTextString().Trim();
                                                    ATag   aTagCh    = fileTable.Rows[f].Columns[z + 1].SearchFor(typeof(ATag), true)[ii] as ATag;
                                                    string urlValues = "http://www.yjgcjy.cn" + aTagCh.Link;
                                                    if (aTagCh.Link.Contains("http://www.yjgcjy.cn"))
                                                    {
                                                        urlValues = aTagCh.Link;
                                                    }
                                                    if (st3 != "")
                                                    {
                                                        BaseAttach attach = ToolDb.GenBaseAttach(st3, info.Id, urlValues);
                                                        base.AttachList.Add(attach);
                                                    }
                                                }
                                            }
                                        }
                                        else
                                        {
                                            continue;
                                        }
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }

                        else
                        {
                            code = "";
                            Parser   parserdetailtwo = new Parser(new Lexer(htmldetail));
                            NodeList dtnodetwo       = parserdetailtwo.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "nr")));
                            if (dtnodetwo != null && dtnodetwo.Count > 0)
                            {
                                HtmlTxt   = dtnodetwo.AsHtml();
                                inviteCtx = dtnodetwo.AsString().Replace("。", "").Trim();
                                Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>");
                                inviteCtx = regexHtml.Replace(inviteCtx, "").Replace("O", "〇");
                                Regex regPrjAddr = new Regex(@"(工程建设地点|工程地点):[^\r\n]+\r\n");
                                prjAddress = regPrjAddr.Match(inviteCtx).Value.Replace("工程建设地点", "").Replace("工程地点", "").Replace(":", "").Trim();
                                if (prjAddress == "")
                                {
                                    prjAddress = "见招标详细信息";
                                }
                                Regex regDateT = new Regex(@"[^u4e00-u9fa5]{4}年[^u4e00-u9fa5]{1,3}月[^u4e00-u9fa5]{1,3}日");
                                beginDate = regDateT.Match(inviteCtx).Value.Trim();
                                beginDate = returnS(beginDate);
                                if (beginDate == "")
                                {
                                    beginDate = string.Empty;
                                }
                                Regex bildUnit = new Regex(@"发包人:[^\r\n]+[\r\n]{1}");
                                buildUnit = bildUnit.Match(inviteCtx).Value.Replace("发包人:", "").Trim();
                                if (buildUnit == "")
                                {
                                    buildUnit = "";
                                }
                                msgType    = "阳江市建设工程交易中心";
                                specType   = "建设工程";
                                inviteType = ToolHtml.GetInviteTypes(prjName);
                                inviteCtx  = inviteCtx.Replace("<?", "").Replace("xml:namespace prefix = o ns = ", "").Replace("urn:schemas-microsoft-com:office:office", "").Replace("/>", "").Trim();
                                inviteCtx  = inviteCtx.Replace("<?", "").Replace("xml:namespace prefix = ns0 ns = ", "").Replace("urn:schemas-microsoft-com:office:smarttags", "").Replace("/>", "").Trim();
                                inviteCtx  = inviteCtx.Replace("<?", "").Replace("xml:namespace prefix = st1 ns = ", "").Replace("urn:schemas-microsoft-com:office:smarttags", "").Replace("/>", "").Trim();
                                inviteCtx  = inviteCtx.Replace("xml:namespace prefix = st1", "").Trim();
                                inviteCtx  = inviteCtx.Replace("点击进入留言", "").Trim();
                                inviteCtx  = inviteCtx.Replace("〇", "0");
                                InviteInfo info = ToolDb.GenInviteInfo("广东省", "阳江市区", "阳西县", string.Empty, code, prjName,
                                                                       prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType,
                                                                       inviteType, specType, otherType, InfoUrl, HtmlTxt);
                                list.Add(info);
                                if (!crawlAll && list.Count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
Exemplo n.º 19
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "-1");
            }
            catch { return(null); }

            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "m_COUNT")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[0].ToNodePlainString().GetRegexBegEnd("/", ")");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + ((i - 1) * 24));
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "m_TAB")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;

                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }
                        ItemName = tr.Columns[1].ToNodePlainString();
                        if (ItemName.Contains("..."))
                        {
                            aTag.GetAttribute("title");
                        }
                        PlanDate = "20" + tr.Columns[2].ToPlainTextString().GetDateRegex("yy-MM-dd");

                        InfoUrl = "http://www.scdrc.gov.cn" + aTag.Link;//aTag.Link.GetReplace(".htm", "_1.htm");

                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch
                        {
                            continue;
                        }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList IsNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("iframe"), new HasAttributeFilter("id", "m_FRAME")));
                        if (IsNode != null && IsNode.Count > 0)
                        {
                            try
                            {
                                InfoUrl = "http://www.scdrc.gov.cn" + aTag.Link.GetReplace(".htm", "_1.htm");
                                htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                            }
                            catch { continue; }
                        }


                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "m_TEXT")));
                        if (dtlNode == null || dtlNode.Count < 1)
                        {
                            parser.Reset();
                            dtlNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("body"));
                        }
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            CtxHtml = dtlNode.AsHtml();
                            parser  = new Parser(new Lexer(CtxHtml));
                            NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                            if (tableNode != null && tableNode.Count > 0)
                            {
                                TableTag tag = tableNode[0] as TableTag;
                                for (int r = 0; r < tag.RowCount; r++)
                                {
                                    for (int c = 0; c < tag.Rows[r].ColumnCount; c++)
                                    {
                                        string temp = tag.Rows[r].Columns[c].ToNodePlainString();
                                        if ((c + 1) % 2 == 0)
                                        {
                                            ItemCtx += temp.GetReplace(":,:") + "\r\n";
                                        }
                                        else
                                        {
                                            ItemCtx += temp.GetReplace(":,:") + ":";
                                        }
                                    }
                                }
                            }
                            else
                            {
                                ItemCtx = CtxHtml.ToCtxString();
                            }
                            ItemContent   = ItemCtx.GetRegex("内容", true, 1000);
                            ApprovalUnit  = ItemCtx.GetRegex("批复单位");
                            ApprovalDate  = ItemCtx.GetRegex("批复日期,批复时间");
                            ApprovalCode  = ItemCtx.GetRegex("批复文号(备案号)");
                            TotalInvest   = ItemCtx.GetRegex("总投资").GetMoney();
                            PlanBeginDate = ItemCtx.GetRegex("开工时间");
                            ItemAddress   = ItemCtx.GetRegex("所属地区");
                            PlanType      = ItemCtx.GetRegex("项目类型");
                            MsgType       = "四川省发展和改革委员会";
                            ItemName      = ItemName.GetReplace("四川省发展和改革委员会");
                            ItemPlan info = ToolDb.GenItemPlan("四川省", "四川省及地市", "", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);
                            parser = new Parser(new Lexer(CtxHtml));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.scdrc.gov.cn/dir1111/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }

            return(list);
        }
Exemplo n.º 20
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "clearfix")), true), new TagNameFilter("a")));

            if (sNode != null && sNode.Count > 0)
            {
                try
                {
                    string temp = sNode[sNode.Count - 1].GetATag().GetAttribute("onclick").Replace("(", "kdxx").Replace(",", "xxdk");
                    pageInt = int.Parse(temp.GetRegexBegEnd("kdxx", "xxdk"));
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://ps.szzfcg.cn/portal/topicView.do?method=view1&id=2887106&siteId=9&underwayFlag=undefined&tstmp=17%3A40%3A43%20GMT%2B0800&page=" + i, Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "fixed")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        TableRow tr = table.Rows[j];
                        beginDate = tr.Columns[1].ToNodePlainString().GetDateRegex("yyyy/MM/dd");
                        ATag aTag = tr.Columns[0].GetATag();
                        prjName = aTag.GetAttribute("title");
                        Regex  regexLink = new Regex(@"id=[^-]+");
                        string id        = regexLink.Match(aTag.Link).Value;
                        InfoUrl = "http://ps.szzfcg.cn/portal/documentView.do?method=view&" + id;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("body"));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml();
                            inviteCtx  = HtmlTxt.ToCtxString();
                            prjAddress = inviteCtx.GetAddressRegex();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            code       = inviteCtx.GetCodeRegex().GetCodeDel();
                            msgType    = "深圳市坪山新区公共资源交易中心";
                            specType   = "政府采购";
                            inviteType = "服务";
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳政府采购", "坪山新区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);

                            list.Add(info);

                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aTagNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aTagNode != null && aTagNode.Count > 0)
                            {
                                for (int k = 0; k < aTagNode.Count; k++)
                                {
                                    ATag aFile = aTagNode[k].GetATag();
                                    if (aFile.IsAtagAttach() || aFile.Link.ToLower().Contains("down"))
                                    {
                                        string link = string.Empty;
                                        if (aFile.Link.Contains("http"))
                                        {
                                            link = aFile.Link;
                                        }
                                        else
                                        {
                                            link = "http://ps.szzfcg.cn/" + aFile.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(aFile.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Exemplo n.º 21
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default, ref cookiestr);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("nowrap", "true")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("总页数", "当前页").Replace(":", "");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState = this.ToolWebSite.GetAspNetViewState(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__VIEWSTATE",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT"
                    }, new string[] {
                        viewState,
                        "MoreInfoList1$Pager",
                        i.ToString()
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        prjName   = aTag.GetAttribute("title");
                        beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://www.hbggzy.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml();
                            inviteCtx  = HtmlTxt.ToCtxString();
                            prjAddress = inviteCtx.GetAddressRegex();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            if (buildUnit.Contains("公司"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                            }
                            if (buildUnit.Contains("地址"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址")) + "地址";
                            }
                            code       = inviteCtx.GetCodeRegex().GetCodeDel();
                            msgType    = "湖北省公共资源交易中心";
                            specType   = "政府采购";
                            inviteType = "水利工程";
                            buildUnit  = buildUnit.Replace(" ", "");
                            InviteInfo info = ToolDb.GenInviteInfo("湖北省", "湖北省及地市", area, string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.hbggzy.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Exemplo n.º 22
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list = new List <BidSituation>();
            string html = string.Empty;
            string cookiestr = string.Empty;
            string viewState = string.Empty;
            int    pageInt = 1, sqlCount = 0;
            string eventValidation = string.Empty;

            try
            {
                html            = this.ToolWebSite.GetHtmlByUrl("http://www.szjsjy.com.cn/HomePage.aspx", Encoding.UTF8, ref cookiestr);
                viewState       = this.ToolWebSite.GetAspNetViewState(html);
                eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                NameValueCollection n = this.ToolWebSite.GetNameValueCollection(
                    new string[] {
                    "__VIEWSTATE",
                    "__VIEWSTATEENCRYPTED",
                    "__EVENTVALIDATION",
                    "TextBox1",
                    "ddl",
                    "DDL_Govt",
                    "DDL_Trade",
                    "txtText",
                    "hdnSN",
                    "ImageButton2.x",
                    "ImageButton2.y"
                },
                    new string[] {
                    viewState,
                    "",
                    eventValidation,
                    "请输入关键字", "0", "0", "0",
                    "CN=年度施工投标人7,OU=1007,L=深圳市,ST=广东省,C=CN",
                    "241EDFC1BA276AA7", "19", "13"
                }
                    );
                string tempCookie = string.Empty;
                html = this.ToolWebSite.GetHtmlByUrl("http://www.szjsjy.com.cn/HomePage.aspx", n
                                                     , Encoding.UTF8, ref tempCookie);
                cookiestr = tempCookie.Replace("path=/;", "").Replace("HttpOnly,", "").Replace("HttpOnly", "").Replace(" ", ""); //"_gscu_485601283=265607704dljg167; _gscs_485601283=32711103yul0an14|pv:5;" + tempCookie.Replace("path=/;", "").Replace("HttpOnly,", "").Replace("HttpOnly", "").Replace(" ", "");
                //tempCookie = tempCookie.Replace("path=/;", "").Replace("HttpOnly,", "").Replace("HttpOnly", "").Replace(" ", "");
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_Content_GridView1")));

            if (pageNode != null && pageNode.Count > 0)
            {
                TableTag table = pageNode[0] as TableTag;
                try
                {
                    string temp = table.Rows[table.RowCount - 1].ToNodePlainString().GetRegexBegEnd(",共", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__VIEWSTATE",
                        "__VIEWSTATEENCRYPTED",
                        "__EVENTVALIDATION",
                        "ctl00$Content$drpSearchType",
                        "ctl00$Content$txtQymc",
                        "ctl00$Content$hdnOperate",
                        "ctl00$hdnPageCount"
                    },
                                                                                      new string[] {
                        "ctl00$Content$GridView1",
                        "Page$" + i,
                        viewState,
                        "",
                        eventValidation,
                        "0", "", "", pageInt.ToString()
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_Content_GridView1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        string code = string.Empty, prjName = string.Empty, PublicityEndDate = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, ctx = string.Empty, HtmlTxt = string.Empty, beginDate = string.Empty;

                        TableRow tr = table.Rows[j];
                        code             = tr.Columns[1].ToNodePlainString();
                        prjName          = tr.Columns[2].ToNodePlainString();
                        PublicityEndDate = tr.Columns[3].ToPlainTextString();
                        beginDate        = DateTime.Now.ToString();
                        InfoUrl          = "http://www.szjsjy.com.cn/BusinessInfo/" + tr.Columns[4].GetATagHref();
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8, ref cookiestr).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "ContentContainer")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.AsHtml();
                            ctx     = HtmlTxt.ToCtxString();
                            msgType = "深圳市建设工程交易中心";
                            BidSituation info = ToolDb.GetBidSituation("广东省", "深圳市工程", "", code, prjName, PublicityEndDate, msgType, InfoUrl, ctx, HtmlTxt, beginDate);
                            sqlCount++;
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(list);
                            }

                            if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx))
                            {
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int d = 0; d < aNode.Count; d++)
                                    {
                                        ATag aTag = aNode[0] as ATag;
                                        if (!aTag.IsAtagAttach())
                                        {
                                            continue;
                                        }
                                        string     url    = "http://www.szjsjy.com.cn/" + aTag.Link.Replace("../", "");
                                        BaseAttach attach = null;
                                        try
                                        {
                                            attach = ToolHtml.GetBaseAttach(url, aTag.LinkText, info.Id, "SiteManage\\Files\\Attach\\");
                                            if (attach == null)
                                            {
                                                attach = ToolHtml.GetBaseAttach(url, aTag.LinkText, info.Id, "SiteManage\\Files\\Attach\\");
                                            }
                                        }
                                        catch { }
                                        if (attach != null)
                                        {
                                            ToolDb.SaveEntity(attach, string.Empty);
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Exemplo n.º 23
0
        public void DealHtml(IList list, string html, bool crawlAll)
        {
            Parser   parserDtl = new Parser(new Lexer(html));
            NodeList aNodes    = parserDtl.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "lefttable")));

            if (aNodes != null && aNodes.Count > 0)
            {
                Type     typs  = typeof(ATag);
                TableTag table = aNodes[0] as TableTag;
                for (int t = 1; t < table.RowCount - 1; t++)
                {
                    string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty,
                           inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, ctx = string.Empty, CreateTime = string.Empty, HtmlTxt = string.Empty;

                    TableRow tr   = table.Rows[t] as TableRow;
                    ATag     aTag = tr.SearchFor(typeof(ATag), true)[0] as ATag;

                    InfoUrl = aTag.Link;
                    prjName = table.Rows[t].Columns[1].ToPlainTextString().Replace("\r\n", "").Replace("\t", "").Replace("&nbsp;", " ").Trim();
                    endDate = table.Rows[t].Columns[2].ToPlainTextString().Replace("\r\n", "").Replace("\t", "").Replace("&nbsp;", " ").Trim();
                    string htmlDtl = string.Empty;
                    try
                    {
                        htmlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default);
                    }
                    catch (Exception ex)
                    {
                        continue;
                    }
                    Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>");
                    htmlDtl = regexHtml.Replace(htmlDtl, "");
                    Parser parserCtx = new Parser(new Lexer(htmlDtl));

                    NodeList ctxNode = parserCtx.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "printTb lefttable")));
                    if (ctxNode != null && ctxNode.Count > 0)
                    {
                        Parser   parserdiv = new Parser(new Lexer(htmlDtl));
                        NodeList aNodesdiv = parserdiv.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "biuuu_button")));
                        HtmlTxt = ctxNode.AsHtml().Replace(aNodesdiv.AsHtml(), "").Trim();
                        Type     tp        = typeof(ATag);
                        TableTag tabTag    = ctxNode[0] as TableTag;
                        string   startTime = tabTag.Rows[1].Columns[0].ToPlainTextString().Replace("\r\n", "").Replace("\t", "").Replace("&nbsp;", " ").Trim();
                        Regex    regex     = new Regex(@"时间:\d{4}-\d{1,2}-\d{1,2} \d{1,2}:\d{1,2}:\d{1,2}");
                        Match    math      = regex.Match(startTime);
                        beginDate = math.Value.Replace("时间:", "").Replace("\r\n", "").Replace("\t", "").Replace("&nbsp;", " ").Trim();

                        Regex regexcode = new Regex("(工程编号|项目编号|招标编号):[^\r\n]+[\r\n]{1}");
                        Match match     = regexcode.Match(tabTag.ToPlainTextString());
                        code = match.Value.Substring(match.Value.IndexOf(":") + 1).Replace("\r\n", "").Replace("\t", "").Replace("&nbsp;", " ").Trim();

                        Regex regexBuildUnit = new Regex("(招标人|建设单位|招标采购单位):[^\r\n]+[\r\n]{1}");
                        Match matchBuildUnit = regexBuildUnit.Match(tabTag.ToPlainTextString());
                        buildUnit = matchBuildUnit.Value.Substring(matchBuildUnit.Value.IndexOf(":") + 1).Replace("\r\n", "").Replace("\t", "").Replace("&nbsp;", " ").Trim();

                        Regex regexAddress = new Regex("(建设地点|项目地点|工程地点):[^\r\n]+[\r\n]{1}");
                        Match matchAddress = regexAddress.Match(tabTag.ToPlainTextString());
                        prjAddress = matchAddress.Value.Substring(matchAddress.Value.IndexOf(":") + 1).Trim();
                        ctx        = tabTag.Rows[2].Columns[0].ToPlainTextString().Replace("&nbsp;", " ").Replace("\r\n\r\n\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n");
                        if (ctx.Length > 0)
                        {
                            Regex regexCtx = new Regex("<!--[^<]+-->");
                            ctx = regexCtx.Replace(ctx, "");
                        }
                        if (Encoding.Default.GetByteCount(code) > 50)
                        {
                            code = "";
                        }
                        if (buildUnit == "" || buildUnit == null)
                        {
                            buildUnit = "";
                        }
                        if (Encoding.Default.GetByteCount(buildUnit) > 150)
                        {
                            buildUnit = buildUnit.Substring(0, 150);
                        }
                        if (Encoding.Default.GetByteCount(prjAddress) > 200)
                        {
                            prjAddress = "见招标公告内容";
                        }
                        if (beginDate.Length > 0 && endDate.Length > 0)
                        {
                            DateTime begin = new DateTime();
                            DateTime end   = new DateTime();
                            try
                            {
                                begin = DateTime.Parse(beginDate);
                                end   = DateTime.Parse(endDate);
                            }
                            catch (Exception)
                            {
                            }
                            if (begin > end)
                            {
                                endDate = string.Empty;
                            }
                        }
                    }

                    parserCtx.Reset();

                    ctxNode = parserCtx.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "toptd_bai")));
                    Regex regDate = new Regex(@"\d{4}-\d{1,2}-\d{1,2}");
                    beginDate = regDate.Match(ctxNode.AsString()).Value.Trim();
                    if (beginDate == "")
                    {
                        beginDate = string.Empty;
                    }
                    inviteType = ToolHtml.GetInviteTypes(prjName);
                    InviteInfo info = ToolDb.GenInviteInfo("广东省", "惠州市区", "惠东县", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, ctx, remark, "惠州市建设工程交易中心", inviteType, "建设工程", string.Empty, InfoUrl, HtmlTxt);
                    list.Add(info);
                    ctxNode = parserCtx.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("a"), new HasAttributeFilter("target", "_blank")));
                    NodeList aTagNodes = ctxNode.SearchFor(typeof(ATag), true);
                    for (int a = 0; a < aTagNodes.Count; a++)
                    {
                        ATag fileTage = aTagNodes[a] as ATag;
                        if (fileTage.Link.Contains("http://www.ebc.huizhou.gov.cn/index/loadNewsFile"))
                        {
                            string     downloadURL = fileTage.Link;
                            BaseAttach attach      = ToolDb.GenBaseAttach(fileTage.ToPlainTextString(), info.Id, downloadURL);
                            base.AttachList.Add(attach);
                        }
                    }
                    if (!crawlAll && list.Count >= this.MaxCount)
                    {
                        return;
                    }
                }
            }
        }
Exemplo n.º 24
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "page")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("/", "页").GetReplace("(");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "/p/" + i + ".html");
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "list")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;

                        INode node = listNode[j];
                        ATag  aTag = node.GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }
                        ItemName = aTag.GetAttribute("title").GetReplace("甘肃省发展和改革委员会");
                        PlanDate = node.ToPlainTextString().GetDateRegex();
                        InfoUrl  = "http://www.gsdrc.gov.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "content")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            CtxHtml     = dtlNode.AsHtml();
                            ItemCtx     = CtxHtml.ToCtxString();
                            TotalInvest = ItemCtx.GetRegexBegEnd("总投资", "万元");
                            ItemCode    = ItemCtx.GetRegex("项目编码");

                            PlanType = "项目审批与核准";
                            MsgType  = "甘肃省发展和改革委员会";
                            ItemPlan info = ToolDb.GenItemPlan("甘肃省", "甘肃省及地市", "", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);

                            list.Add(info);
                            parser = new Parser(new Lexer(CtxHtml));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.gsdrc.gov.cn/" + a.Link.GetReplace("../,./");
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Exemplo n.º 25
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.UTF8, ref cookiestr);
            }
            catch (Exception ex)
            {
                return(list);
            }

            Parser   parser   = new Parser(new Lexer(htl));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("script"), new HasAttributeFilter("type", "text/javascript")));
            string   b        = pageNode.AsString().GetCtxBr();
            string   c        = b.Replace("('", "徐鑫").Replace("')", "凯德");

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = c.GetRegexBegEnd("徐鑫", "凯德");
                    page = int.Parse(temp);
                }
                catch { }
            }

            for (int i = 1; i <= page; i++)
            {
                if (i >= 1)
                {
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(
                        new string[] { "fcInfotitle",
                                       "currentPage" },
                        new string[] {
                        "",
                        i.ToString()
                    }
                        );
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl("https://www.dgzb.com.cn/ggzy/website/WebPagesManagement/findListByPage?fcInfotype=1&tenderkind=A&projecttendersite=SS&orderFiled=fcInfoenddate&orderValue=desc", nvc, Encoding.UTF8);
                    }
                    catch
                    {
                        continue;
                    }
                }
                JavaScriptSerializer        serializer  = new JavaScriptSerializer();
                Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(htl);
                foreach (KeyValuePair <string, object> obj in smsTypeJson)
                {
                    object[] array = (object[])obj.Value;

                    foreach (object arrValue in array)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        Dictionary <string, object> dic = (Dictionary <string, object>)arrValue;
                        code      = Convert.ToString(dic["fcTendersn"]);
                        prjName   = Convert.ToString(dic["fcInfotitle"]);
                        beginDate = Convert.ToString(dic["fcInfostartdate"]).GetDateRegex("yyyy-MM-dd");

                        string xu = Convert.ToString(dic["id"]);
                        InfoUrl = "https://www.dgzb.com.cn/ggzy/website/WebPagesManagement/jsdetail?publishId=" + xu + "&fcInfotype=1";
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(InfoUrl), Encoding.UTF8).Replace("&nbsp;", "");
                        }
                        catch (Exception)
                        {
                            continue;
                        }
                        Parser   parserdetail = new Parser(new Lexer(htmldetail));
                        NodeList dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "detail")));
                        if (dtnode.Count > 0 && dtnode != null)
                        {
                            HtmlTxt = dtnode.AsHtml();

                            inviteCtx = HtmlTxt.Replace("</p>", "\r\n").ToCtxString();

                            prjAddress = inviteCtx.GetRegexBegEnd("工程地址:", "\r");
                            buildUnit  = inviteCtx.GetRegexBegEnd("建设单位:", "\r");

                            msgType  = "东莞市建设工程交易中心";
                            specType = "建设工程";
                            Regex regoType = new Regex(@"工程类型(:|:)[^\r\n]+\r\n");
                            otherType = regoType.Match(inviteCtx).Value.Replace("工程类型:", "").Trim();
                            inviteCtx = inviteCtx.Replace("ctl00_cph_context_span_MetContent", "").Replace("<span id=", "").Replace("</span>", "").Replace(">", "").Trim();
                            if (buildUnit == "")
                            {
                                buildUnit = "见招标信息";
                            }
                            inviteType = ToolHtml.GetInviteTypes(prjName);
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "东莞市区", "",
                                                                   string.Empty, code, prjName, prjAddress, buildUnit,
                                                                   beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);//附件搜索
                            parserdetail.Reset();
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aTagNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aTagNode != null && aTagNode.Count > 0)
                            {
                                for (int k = 0; k < aTagNode.Count; k++)
                                {
                                    ATag aTag = aTagNode[k].GetATag();
                                    if (aTag.IsAtagAttach())
                                    {
                                        string linkurl = aTag.Link;
                                        linkurl = linkurl.Replace("&amp;", "&");
                                        string cc = string.Empty;
                                        string aa = linkurl.GetRegexBegEnd("&", "id");
                                        if (aa == "")
                                        {
                                            cc = linkurl;
                                        }
                                        else
                                        {
                                            cc = linkurl.Replace(aa, "");
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(aTag.LinkText, info.Id, cc);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }


                //for (int i = 1; i < page; i++)
                //{
                //    if (i > 1)
                //    {
                //        viewState = this.ToolWebSite.GetAspNetViewState(htl);
                //        eventValidation = this.ToolWebSite.GetAspNetEventValidation(htl);
                //        NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[]{
                //            "__EVENTTARGET",
                //            "__EVENTARGUMENT",
                //            "__LASTFOCUS",
                //            "__VIEWSTATE",
                //            "__EVENTVALIDATION",
                //            "ctl00$cph_context$drp_selSeach",
                //            "ctl00$cph_context$txt_strWhere",
                //            "ctl00$cph_context$drp_Rq",
                //            "ctl00$cph_context$GridViewPaingTwo1$txtGridViewPagingForwardTo",
                //            "ctl00$cph_context$GridViewPaingTwo1$btnNext.x",
                //            "ctl00$cph_context$GridViewPaingTwo1$btnNext.y"
                //        }, new string[]{
                //            string.Empty,
                //            string.Empty,
                //            string.Empty,
                //            viewState,
                //            eventValidation,
                //            "1",
                //            string.Empty,
                //            "3",
                //            (i-1).ToString(),
                //            "8",
                //            "10"
                //        });
                //        try
                //        {
                //            htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                //        }
                //        catch (Exception ex) { continue; }
                //    }
                //    parser = new Parser(new Lexer(htl));
                //    NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_cph_context_GridView1")));
                //    if (tableNodeList != null && tableNodeList.Count > 0)
                //    {
                //        TableTag table = (TableTag)tableNodeList[0];
                //        for (int j = 1; j < table.RowCount; j++)
                //        {
                //            string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                //                prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                //                specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                //                remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                //                CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                //            TableRow tr = table.Rows[j];
                //            code = tr.Columns[1].ToPlainTextString().Trim();
                //            prjName = tr.Columns[2].ToPlainTextString().Trim();
                //            beginDate = tr.Columns[4].ToPlainTextString().Trim().GetReplace(" - ", "&").Split('&')[0].Trim();
                //            try
                //            {
                //                endDate = tr.Columns[4].ToPlainTextString().Trim().GetReplace(" - ", "&").Split('&')[1].Trim();
                //            }
                //            catch { }
                //            ATag aTag = tr.Columns[2].SearchFor(typeof(ATag), true)[0] as ATag;
                //            InfoUrl = "http://www.dgzb.com.cn:8080/dgjyweb/sitemanage/" + aTag.Link.Replace("amp;", "").Trim();
                //            string htmldetail = string.Empty;
                //            try
                //            {
                //                htmldetail = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(InfoUrl), Encoding.UTF8).Replace("&nbsp;", "");
                //            }
                //            catch (Exception)
                //            {
                //                continue;
                //            }
                //            Parser parserdetail = new Parser(new Lexer(htmldetail));
                //            NodeList dtnode = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "ctl00_cph_context_span_MetContent")));
                //            if (dtnode.Count > 0 && dtnode != null)
                //            {
                //                HtmlTxt = dtnode.AsHtml();
                //                inviteCtx = dtnode.ToHtml().Replace("<br/>", "\r\n");
                //                Regex regBuidUnit = new Regex(@"建设单位:[^\r\n]+\r\n");
                //                buildUnit = regBuidUnit.Match(inviteCtx).Value.Replace("建设单位:", "").Replace(":", "").Trim();
                //                Regex regPrjAddr = new Regex(@"(工程地点|工程地址)(:|:)[^\r\n]+\r\n");
                //                prjAddress = regPrjAddr.Match(inviteCtx).Value.Replace("工程地点:", "").Replace("工程地址", "").Replace(":", "").Trim();
                //                msgType = "东莞市建设工程交易中心";
                //                specType = "建设工程";
                //                Regex regoType = new Regex(@"工程类型(:|:)[^\r\n]+\r\n");
                //                otherType = regoType.Match(inviteCtx).Value.Replace("工程类型:", "").Trim();
                //                inviteCtx = inviteCtx.Replace("ctl00_cph_context_span_MetContent", "").Replace("<span id=", "").Replace("</span>", "").Replace(">", "").Trim();
                //                if (buildUnit == "")
                //                {
                //                    buildUnit = "见招标信息";
                //                }
                //                inviteType = ToolHtml.GetInviteTypes(prjName);
                //                InviteInfo info = ToolDb.GenInviteInfo("广东省", "东莞市区", "",
                //                    string.Empty, code, prjName, prjAddress, buildUnit,
                //                    beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                //                list.Add(info);//附件搜索
                //                parserdetail.Reset();
                //                NodeList fileNode = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_cph_context_DownLoadFiles1_GridView2")));
                //                if (fileNode != null && fileNode.Count > 0)
                //                {
                //                    string iii = fileNode.AsString().Trim();
                //                    TableTag tablefile = (TableTag)fileNode[0];
                //                    for (int k = 1; k < tablefile.RowCount; k++)
                //                    {
                //                        string fileName = string.Empty, fileUrl = string.Empty;
                //                        TableRow trfile = tablefile.Rows[k];
                //                        if (trfile.Columns[1].ToPlainTextString().Trim() != "")
                //                        {
                //                            ATag aTagfile = trfile.Columns[1].SearchFor(typeof(ATag), true)[0] as ATag;
                //                            fileName = trfile.Columns[1].ToPlainTextString().Trim();
                //                            fileUrl = "http://www.dgzb.com.cn/dgjyweb/sitemanage/" + aTagfile.Link.Replace("amp;", "").Trim();
                //                            BaseAttach attach = ToolDb.GenBaseAttach(fileName, info.Id, fileUrl);
                //                            base.AttachList.Add(attach);
                //                        }
                //                    }
                //                }
                //                parserdetail.Reset();//补充文件搜索
                //                NodeList fileBuChongNode = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_cph_context_BuChongFileDown1_GridView2")));
                //                if (fileBuChongNode != null && fileBuChongNode.Count > 0)
                //                {
                //                    string iii = fileBuChongNode.AsString().Trim();
                //                    TableTag tableBuChongfile = (TableTag)fileBuChongNode[0];
                //                    for (int k = 1; k < tableBuChongfile.RowCount; k++)
                //                    {
                //                        string fileName = string.Empty, fileUrl = string.Empty;
                //                        TableRow trfileBuChong = tableBuChongfile.Rows[k];
                //                        if (trfileBuChong.Columns[1].ToPlainTextString().Trim() != "")
                //                        {
                //                            ATag aTagfile = trfileBuChong.Columns[1].SearchFor(typeof(ATag), true)[0] as ATag;
                //                            fileName = trfileBuChong.Columns[1].ToPlainTextString().Trim();
                //                            fileUrl = "http://www.dgzb.com.cn/dgjyweb/sitemanage/" + aTagfile.Link.Replace("amp;", "").Trim();
                //                            BaseAttach attach = ToolDb.GenBaseAttach(fileName, info.Id, fileUrl);
                //                            base.AttachList.Add(attach);
                //                        }
                //                    }
                //                }
                //                if (!crawlAll && list.Count >= this.MaxCount) return list;
                //            }
                //        }
                //    }
            }
            return(null);
        }
Exemplo n.º 26
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "mvcPager")), true), new TagNameFilter("a")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[pageNode.Count - 2].GetATagHref().GetReplace("/Front/Zbgg/System.Web.Mvc.UrlParameter/");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "/System.Web.Mvc.UrlParameter/" + i);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "div_Li1")), true), new TagNameFilter("table")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[0].GetATag();
                        prjName   = aTag.GetAttribute("title");
                        buildUnit = tr.Columns[1].GetATagValue("title");
                        beginDate = tr.Columns[3].ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://www.fjjsjy.com" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "Table2")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml();
                            inviteCtx  = HtmlTxt.GetReplace("<br/>,<br />,<br>", "\r\n").ToCtxString();
                            prjAddress = inviteCtx.GetAddressRegex().GetCodeDel();

                            code     = inviteCtx.GetCodeRegex().GetCodeDel();
                            msgType  = "福建省建设工程交易中心";
                            specType = inviteType = "建设工程";
                            InviteInfo info = ToolDb.GenInviteInfo("福建省", "福建省及地市", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.fjjsjy.com/" + a.Link.GetReplace("../,./");
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Exemplo n.º 27
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new ArrayList();
            //取得页码
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(list);
            }

            Parser parser = new Parser(new Lexer(html));

            NodeList sNode      = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("cellspacing", "2"), new TagNameFilter("table")));
            string   pageString = sNode.AsString();
            Regex    regexPage  = new Regex(@",共[^页]+页,");
            Match    pageMatch  = regexPage.Match(pageString);

            try { pageInt = int.Parse(pageMatch.Value.Replace(",共", "").Replace("页,", "").Trim()); }
            catch (Exception) { }

            string cookiestr = string.Empty;

            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "__VIEWSTATEENCRYPTED", "__EVENTVALIDATION", "ctl00$hdnPageCount" }, new string[] { "ctl00$Content$GridView1", "Page$" + i.ToString(), viewState, "", eventValidation, pageInt.ToString() });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8);
                    }
                    catch { continue; }
                }

                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "ctl00_Content_GridView1"), new TagNameFilter("table")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr = table.Rows[j] as TableRow;
                        code      = tr.Columns[1].ToPlainTextString().Trim();
                        prjName   = tr.Columns[2].ToPlainTextString().Trim();
                        buildUnit = tr.Columns[4].ToPlainTextString().Trim();
                        bidUnit   = tr.Columns[5].ToPlainTextString().Trim();
                        bidMoney  = tr.Columns[6].ToPlainTextString().Replace("万元", "").Trim();
                        beginDate = tr.Columns[3].ToPlainTextString().Split('至')[0].Replace("年", "-").Replace("月", "-").Replace("日", " ").Replace("时", "").Trim();
                        endDate   = tr.Columns[3].ToPlainTextString().Split('至')[1].Replace("年", "-").Replace("月", "-").Replace("日", " ").Replace("时", "").Trim();
                        ATag aTag = tr.Columns[2].Children[0] as ATag;
                        InfoUrl = "http://www.szjsjy.com.cn/BusinessInfo/" + aTag.Link;

                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace("&nbsp;", "").Trim();
                            Parser   dtlparserHTML = new Parser(new Lexer(htmldetail));
                            NodeList dtnodeHTML    = dtlparserHTML.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "lblXXNR"), new TagNameFilter("span")));
                            HtmlTxt    = dtnodeHTML.AsHtml();
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace("&nbsp;", "").Replace("</br>", "\r\n").Replace("<br>", "\r\n");
                        }
                        catch (Exception ex) { continue; }
                        Parser   dtlparser = new Parser(new Lexer(htmldetail));
                        NodeList dtnode    = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "lblXXNR"), new TagNameFilter("span")));

                        bidCtx = dtnode.AsString().Replace(" ", "");
                        Regex regPrjAdd = new Regex(@"(工程地点|工程地址):[^\r\n]+[\r\n]{1}");
                        prjAddress = regPrjAdd.Match(bidCtx).Value.Replace("工程地点:", "").Replace("工程地址:", "").Trim();
                        msgType    = "深圳市建设工程交易中心";
                        specType   = "建设工程";
                        Regex regprjMgr = new Regex(@"(项目经理|项目负责人|项目总监|建造师|监理师|项目经理姓名)(:|:)[^\s]+[\s]{1}");
                        prjMgr = regprjMgr.Match(bidCtx).Value.Replace("项目经理姓名", "").Replace("项目经理", "").Replace("项目总监", "").Replace("建造师", "").Replace("项目负责人", "").Replace(":", "").Replace(":", "").Replace("监理师", "").Trim();

                        string bidUnitInfo = bidCtx.GetBidRegex();

                        if (!string.IsNullOrEmpty(bidUnitInfo))
                        {
                            bidUnit = bidUnitInfo;
                        }

                        Regex  regInvType = new Regex(@"[^\r\n]+[\r\n]{1}");
                        string InvType    = regInvType.Match(bidCtx).Value;

                        prjName = ToolDb.GetPrjName(prjName);
                        if (!string.IsNullOrEmpty(bidUnit))
                        {
                            bidUnit = ToolDb.GetBidUnit(bidUnit);
                            if (bidUnit.Contains("报价"))
                            {
                                bidUnit = bidUnit.Remove(bidUnit.IndexOf("报价"));
                            }
                        }
                        bidType = ToolHtml.GetInviteTypes(InvType);
                        BidInfo info = ToolDb.GenBidInfo("广东省", "深圳市工程", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, string.Empty, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                        list.Add(info);

                        dtlparser.Reset();
                        NodeList dlNodes = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "trFujian"), new TagNameFilter("tr")));
                        if (dlNodes != null && dlNodes.Count > 0)
                        {
                            TableRow attr      = dlNodes[0] as TableRow;
                            NodeList fileNodes = attr.SearchFor(typeof(ATag), true);
                            if (fileNodes != null && fileNodes.Count > 0)
                            {
                                for (int f = 0; f < fileNodes.Count; f++)
                                {
                                    ATag fileTag = fileNodes[f] as ATag;
                                    if (!string.IsNullOrEmpty(fileTag.Link))
                                    {
                                        BaseAttach attach = ToolDb.GenBaseAttach(fileTag.StringText, info.Id, fileTag.Link.Replace("..", "http://www.szjsjy.com.cn"));
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                        }
                        if (!crawlAll && list.Count >= this.MaxCount)
                        {
                            return(list);
                        }
                    }
                }
            }
            return(list);
        }
Exemplo n.º 28
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList noList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "Top10 TxtCenter")));

            if (noList != null && noList.Count > 0)
            {
                string temp = noList.AsString().GetRegexBegEnd("/", "页");
                try
                {
                    pageInt = Convert.ToInt32(temp);
                }
                catch { pageInt = 10; }
            }
            else
            {
                pageInt = 10;
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.bidding.csg.cn/zbhxrgs/index_" + i.ToString() + ".jhtml", Encoding.UTF8);
                    }
                    catch
                    {
                        continue;
                    }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "W750 Right")), true), new TagNameFilter("li")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    for (int j = 1; j < nodeList.Count; j++)
                    {
                        string prjName = string.Empty,
                               buildUnit = string.Empty, bidUnit = string.Empty,
                               bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty,
                               beginDate = string.Empty,
                               endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty,
                               msgType = string.Empty, bidCtx = string.Empty,
                               prjAddress = string.Empty, remark = string.Empty,
                               prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        ATag aTag = nodeList[j].GetATag();
                        prjName   = aTag.LinkText;
                        beginDate = nodeList[j].ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://www.bidding.csg.cn" + aTag.Link;
                        string htlDtl = string.Empty;
                        try
                        {
                            htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch
                        {
                            continue;
                        }
                        parser = new Parser(new Lexer(htlDtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "Center W1000")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.AsHtml();
                            bidCtx  = HtmlTxt.ToCtxString();
                            parser  = new Parser(new Lexer(HtmlTxt));
                            NodeList nameNode = parser.ExtractAllNodesThatMatch(new AndFilter(new
                                                                                              TagNameFilter("h1"), new HasAttributeFilter("class", "TxtCenter Padding10")));
                            if (nameNode != null && nameNode.Count > 0)
                            {
                                prjName = nameNode[0].ToNodePlainString();
                            }
                            bidType    = prjName.GetInviteBidType();
                            buildUnit  = bidCtx.GetBuildRegex();
                            prjAddress = bidCtx.GetAddressRegex();
                            code       = bidCtx.GetCodeRegex().GetCodeDel();
                            bidUnit    = bidCtx.GetBidRegex();
                            bidMoney   = bidCtx.GetMoneyRegex();
                            if (string.IsNullOrWhiteSpace(bidUnit))
                            {
                                bidUnit = bidCtx.GetRegexBegEnd("公开询价确定", "成交单位");
                            }
                            if (string.IsNullOrWhiteSpace(bidUnit))
                            {
                                bidUnit = bidCtx.GetRegex("成交人,拟定采购单位,成交候选人,第一推荐成交候选人,第一");
                            }
                            if (string.IsNullOrWhiteSpace(bidUnit))
                            {
                                bidUnit = bidCtx.GetRegexBegEnd("签约单位为", "。");
                            }
                            if (string.IsNullOrWhiteSpace(bidUnit))
                            {
                                bidUnit = bidCtx.GetRegexBegEnd("第一入围候选人", ",");
                            }
                            if (string.IsNullOrEmpty(bidUnit))
                            {
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                                if (tableNode != null && tableNode.Count > 0)
                                {
                                    TableTag tableDtl = tableNode[0] as TableTag;
                                    string   ctx      = string.Empty;
                                    for (int k = 1; k < tableDtl.RowCount; k++)
                                    {
                                        try
                                        {
                                            ctx += tableDtl.Rows[k].Columns[0].ToNodePlainString().Replace("单位名称", "中标单位").Replace("中标候选人", "中标单位") + ":";
                                            ctx += tableDtl.Rows[k].Columns[1].ToNodePlainString() + "\r\n";
                                        }
                                        catch { }
                                    }
                                    bidUnit = ctx.GetReplace("中标单位:第一").GetBidRegex();
                                    if (string.IsNullOrWhiteSpace(bidMoney) || bidMoney == "0")
                                    {
                                        bidMoney = ctx.GetMoneyRegex();
                                    }
                                    prjMgr = ctx.GetRegex("项目经理姓名及资质证书编号");
                                    if (prjMgr.IndexOf("/") > 0)
                                    {
                                        prjMgr = prjMgr.Remove(prjMgr.IndexOf("/"));
                                    }
                                    if (string.IsNullOrEmpty(bidUnit) || string.IsNullOrEmpty(bidMoney) || bidMoney == "0")
                                    {
                                        ctx = string.Empty;
                                        for (int k = 0; k < tableDtl.RowCount; k++)
                                        {
                                            try
                                            {
                                                for (int d = 0; d < tableDtl.Rows[k].ColumnCount; d++)
                                                {
                                                    ctx += tableDtl.Rows[k].Columns[d].ToNodePlainString().Replace("单位名称", "中标单位").Replace("中标侯选人", "中标单位") + ":";
                                                    ctx += tableDtl.Rows[k + 1].Columns[d].ToNodePlainString() + "\r\n";
                                                }
                                            }
                                            catch { }
                                        }
                                        bidUnit = ctx.GetBidRegex();
                                        if (string.IsNullOrWhiteSpace(bidMoney) || bidMoney == "0")
                                        {
                                            bidMoney = ctx.GetMoneyRegex();
                                        }
                                        prjMgr = ctx.GetRegex("项目经理姓名及资质证书编号");
                                    }
                                }
                            }
                            if (string.IsNullOrEmpty(prjAddress))
                            {
                                prjAddress = "见中标信息";
                            }
                            specType = "其他";
                            msgType  = "中国南方电网有限责任公司招标服务中心";
                            if (string.IsNullOrEmpty(buildUnit))
                            {
                                buildUnit = "中国南方电网有限责任公司招标服务中心";
                            }
                            bidUnit = bidUnit.GetReplace(":");
                            BidInfo info = ToolDb.GenBidInfo("广东省", "电网专项工程", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType,
                                                             bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList nodeAtag = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (nodeAtag != null && nodeAtag.Count > 0)
                            {
                                for (int c = 0; c < nodeAtag.Count; c++)
                                {
                                    ATag a = nodeAtag[c] as ATag;
                                    if (a.Link.IsAtagAttach())
                                    {
                                        string alink = "http://www.bidding.csg.cn/" + a.Link;
                                        try
                                        {
                                            BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText.Replace("&nbsp", ""), info.Id, alink);
                                            base.AttachList.Add(attach);
                                        }
                                        catch { }
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Exemplo n.º 29
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list    = new List <BidInfo>();
            int   pageInt = 15;
            //取得页码
            string html = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList aNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "scott"))), new TagNameFilter("a")));

            if (aNodes != null && aNodes.Count > 0)
            {
                try
                {
                    string temp = aNodes.GetATagHref(aNodes.Count - 1);
                    pageInt = Convert.ToInt32(temp.GetRegexBegEnd("(", ")"));
                }
                catch
                {
                    pageInt = 15;
                }
            }
            parser.Reset();

            //逐页读取数据
            for (int page = 1; page <= pageInt; page++)
            {
                try
                {
                    if (page > 1)
                    {
                        string typeId           = html.GetInputValue("typeId");
                        string boardId          = html.GetInputValue("boardId");
                        string totalRows        = html.GetInputValue("totalRows");
                        NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                            "typeId", "boardId", "newstitle", "sTime", "eTime", "totalRows", "pageNO"
                        }, new string[] {
                            typeId, boardId, string.Empty, string.Empty, string.Empty, totalRows, page.ToString()
                        });
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.Default);
                    }
                }
                catch
                {
                    continue;
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "lefttable")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        string prjName = string.Empty,
                               buildUnit = string.Empty, bidUnit = string.Empty,
                               bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty,
                               beginDate = string.Empty,
                               endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty,
                               msgType = string.Empty, bidCtx = string.Empty,
                               prjAddress = string.Empty, remark = string.Empty,
                               prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        TableRow tr = table.Rows[j];
                        prjName   = tr.Columns[1].ToNodePlainString();
                        beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoUrl   = tr.GetATagHref();

                        string htlDtl = string.Empty;
                        try
                        {
                            htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default);
                            parser = new Parser(new Lexer(htlDtl));
                            NodeList  ifrm   = parser.ExtractAllNodesThatMatch(new TagNameFilter("iframe"));
                            IFrameTag iframe = ifrm.SearchFor(typeof(IFrameTag), true)[0] as IFrameTag;
                            htlDtl = this.ToolWebSite.GetHtmlByUrl(iframe.GetAttribute("src").Replace("/zsweb/..", ""), Encoding.Default);
                        }
                        catch { Logger.Error("BidZhongshan"); continue; }
                        parser = new Parser(new Lexer(htlDtl.Replace("th", "td").Replace("TH", "td")));
                        NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "newtalbe_c")));
                        if (dtlList != null && dtlList.Count > 0)
                        {
                            HtmlTxt = dtlList.AsHtml();
                            bidCtx  = HtmlTxt.ToCtxString();
                            TableTag tab = dtlList[0] as TableTag;
                            string   ctx = string.Empty;
                            for (int k = 0; k < tab.RowCount; k++)
                            {
                                for (int d = 0; d < tab.Rows[k].ColumnCount; d++)
                                {
                                    if ((d + 1) % 2 == 0)
                                    {
                                        ctx += tab.Rows[k].Columns[d].ToNodePlainString() + "\r\n";
                                    }
                                    else
                                    {
                                        ctx += tab.Rows[k].Columns[d].ToNodePlainString() + ":";
                                    }
                                }
                            }
                            code       = htlDtl.ToCtxString().GetCodeRegex().Replace("[", "").Replace("]", "");
                            buildUnit  = ctx.GetBuildRegex();
                            prjAddress = ctx.GetAddressRegex();
                            bidUnit    = ctx.GetBidRegex();
                            bidMoney   = ctx.GetMoneyRegex();
                            bidType    = prjName.GetInviteBidType();
                            msgType    = "中山市住房和城乡建设局";
                            specType   = "建设工程";
                            BidInfo info = ToolDb.GenBidInfo("广东省", "中山市区", string.Empty, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType,
                                                             bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(htlDtl));
                            NodeList aList = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aList != null && aList.Count > 0)
                            {
                                for (int c = 0; c < aList.Count; c++)
                                {
                                    ATag a = aList[c] as ATag;
                                    if (a.LinkText.IsAtagAttach())
                                    {
                                        string     alink  = a.Link;
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText.Replace("&nbsp", "").Replace(";", "").Replace(";", ""), info.Id, alink);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Exemplo n.º 30
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookieStr       = string.Empty;
            int    pageInt         = 1;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "ecms_pagination")), true), new TagNameFilter("a")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    ATag   atag = pageList[pageList.Count - 2] as ATag;
                    string temp = atag.LinkText;
                    pageInt = int.Parse(temp);
                }
                catch
                { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.shcac.edu.cn:80/html/xxdt/tzgg/" + i.ToString() + ".html", Encoding.UTF8);
                    }
                    catch
                    {
                        continue;
                    }
                }

                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "list_main_content")), true), new TagNameFilter("ul")), true), new TagNameFilter("li")));


                if (nodeList != null && nodeList.Count > 0)
                {
                    for (int j = 0; j < nodeList.Count; j++)
                    {
                        string btName = string.Empty, btTime = string.Empty, btUrl = string.Empty;
                        ATag   aTag = nodeList[j].GetATag();
                        btName = nodeList[j].ToNodePlainString();
                        btTime = nodeList[j].ToNodePlainString().GetDateRegex();
                        btName = btName.Replace(btTime, "");
                        btUrl  = aTag.Link;
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = this.ToolWebSite.GetHtmlByUrl(btUrl, Encoding.UTF8);
                            htldtl = htldtl.GetJsString();
                        }
                        catch
                        {
                            continue;
                        }
                        parser = new Parser(new Lexer(htldtl));

                        NodeList dtlBt = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "detail_main_content")), true), new TagNameFilter("h3")));
                        if (dtlBt != null && dtlBt.Count > 0)
                        {
                            btName = dtlBt.AsString();

                            if (btName.Contains("招标公告") || btName.Contains("补充公告"))
                            {
                                string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                                       prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                                       specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                                       remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                                       CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                                parser.Reset();
                                NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("style", "line-height:22px;")));
                                if (dtlList != null && dtlList.Count > 0)
                                {
                                    prjName    = btName;
                                    beginDate  = btTime;
                                    InfoUrl    = btUrl;
                                    HtmlTxt    = dtlList.ToHtml();
                                    inviteCtx  = dtlList.ToHtml().Replace("</p>", "\r\n").ToCtxString().Replace("\r\n\t", "\r\n").Replace("\r\n\r\n", "\r\n");
                                    buildUnit  = inviteCtx.GetBuildRegex();
                                    prjAddress = inviteCtx.GetAddressRegex();
                                    msgType    = "上海民航职业技术学院";
                                    specType   = "";
                                    InviteInfo info = ToolDb.GenInviteInfo("上海市", "上海市区", string.Empty, string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                                    list.Add(info);



                                    parser = new Parser(new Lexer(HtmlTxt));
                                    NodeList aNodes = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                    if (aNodes != null && aNodes.Count > 0)
                                    {
                                        for (int a = 0; a < aNodes.Count; a++)
                                        {
                                            ATag aFile = aNodes[a] as ATag;
                                            if (aFile.IsAtagAttach())
                                            {
                                                string link = string.Empty;
                                                if (aFile.Link.ToLower().Contains("http"))
                                                {
                                                    link = aFile.Link;
                                                }
                                                else
                                                {
                                                    link = aFile.Link;
                                                }
                                                BaseAttach attach = ToolDb.GenBaseAttach(aFile.LinkText, info.Id, link);
                                                base.AttachList.Add(attach);
                                            }
                                        }
                                    }

                                    if (!crawlAll && list.Count >= this.MaxCount)
                                    {
                                        return(list);
                                    }
                                }
                            }
                            else if (btName.Contains("中标结果") || btName.Contains("结果公示") || btName.Contains("中标公示"))
                            {
                                string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty,
                                       bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty,
                                       msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty,
                                       HtmlTxt = string.Empty, area = string.Empty;
                                parser.Reset();
                                NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("style", "line-height:22px;")));
                                if (dtlList != null && dtlList.Count > 0)
                                {
                                    prjName   = btName;
                                    beginDate = btTime;
                                    InfoUrl   = btUrl;
                                    HtmlTxt   = dtlList.ToHtml();
                                    bidCtx    = dtlList.ToHtml().Replace("</p>", "\r\n").ToCtxString().Replace("\r\n\t", "\r\n").Replace("\r\n\r\n", "\r\n");
                                    buildUnit = bidCtx.GetBuildRegex();

                                    bidUnit = bidCtx.GetBidRegex();
                                    if (string.IsNullOrWhiteSpace(bidUnit))
                                    {
                                        bidUnit = bidCtx.GetRegex("中标人");
                                    }
                                    bidMoney  = bidCtx.GetMoneyRegex();
                                    buildUnit = bidCtx.GetBuildRegex();
                                    if (string.IsNullOrWhiteSpace(buildUnit))
                                    {
                                        buildUnit = bidCtx.GetRegex("招标人");
                                    }
                                    code = bidCtx.GetCodeRegex().GetCodeDel();
                                    if (!string.IsNullOrWhiteSpace(code))
                                    {
                                        if (code[code.Length - 1] != '号')
                                        {
                                            code = "";
                                        }
                                    }
                                    if (bidUnit.Contains("公司"))
                                    {
                                        bidUnit = bidUnit.Remove(bidUnit.IndexOf("公司")) + "公司";
                                    }
                                    msgType  = "上海民航职业技术学院";
                                    specType = "";
                                    bidType  = ToolHtml.GetInviteTypes(prjName);

                                    BidInfo info = ToolDb.GenBidInfo("上海市", "上海市区", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                                    list.Add(info);

                                    parser = new Parser(new Lexer(HtmlTxt));
                                    NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                    if (aNode != null && aNode.Count > 0)
                                    {
                                        for (int k = 0; k < aNode.Count; k++)
                                        {
                                            ATag a = aNode[k] as ATag;
                                            if (a.IsAtagAttach())
                                            {
                                                string link = string.Empty;
                                                if (a.Link.ToLower().Contains("http"))
                                                {
                                                    link = a.Link;
                                                }
                                                BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                                base.AttachList.Add(attach);
                                            }
                                        }
                                    }
                                    if (!crawlAll && list.Count >= this.MaxCount)
                                    {
                                        return(list);
                                    }
                                }
                            }
                            else
                            {
                                continue;
                            }
                        }
                        else
                        {
                            continue;
                        }
                    }
                }
            }
            return(list);
        }