コード例 #1
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookieStr       = string.Empty;
            int    pageInt         = 1;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("tr"), new HasAttributeFilter("valign", "top")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    TableRow tr   = pageList[0] as TableRow;
                    string   temp = tr.Columns[tr.ColumnCount - 1].ToNodePlainString();
                    temp    = temp.Substring(temp.Length - 1, 1);
                    pageInt = int.Parse(temp.Replace("(", ""));
                }
                catch
                { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        viewState       = this.ToolWebSite.GetAspNetViewState(html);
                        eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                        NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(
                            new string[] {
                            "__EVENTTARGET",
                            "__EVENTARGUMENT",
                            "__VIEWSTATE",
                            "__VIEWSTATEENCRYPTED",
                            "__EVENTVALIDATION",
                            "sel",
                            "beginDate",
                            "endDate",
                            "infotitle"
                        },
                            new string[] {
                            "GridView1", "Page$" + i.ToString(),
                            viewState, "", eventValidation, "1", "", "", ""
                        }
                            );
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "GridView1")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        string InfoTitle = string.Empty, InfoType = string.Empty, PublistTime = string.Empty, InfoCtx = string.Empty, InfoUrl = string.Empty, prjCode = string.Empty, buildUnit = string.Empty, htmlTxt = string.Empty;

                        TableRow tr = table.Rows[j];
                        InfoType    = "公告公示";
                        InfoTitle   = tr.Columns[1].ToNodePlainString();
                        PublistTime = tr.Columns[3].ToPlainTextString();
                        InfoUrl     = "http://www.szjsjy.com.cn/Notify/" + tr.Columns[1].GetATagHref();
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8);
                            htldtl = htldtl.GetJsString();
                        }
                        catch { continue; }

                        NoticeInfo info = ToolDb.GenNoticeInfo("广东省", "深圳市工程", string.Empty, string.Empty, InfoTitle, InfoType, InfoCtx, PublistTime, string.Empty, MsgTypeCosnt.ShenZhenMsgType, InfoUrl, prjCode, buildUnit, string.Empty, string.Empty, string.Empty, string.Empty, htmlTxt);

                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "750")));

                        if (dtlList != null && dtlList.Count > 0)
                        {
                            InfoCtx = dtlList.ToHtml().Replace("</tr>", "\r\n").ToCtxString().Replace("\r\n\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\t", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n");
                            parser  = new Parser(new Lexer(dtlList.ToHtml()));
                            NodeList aList = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aList != null && aList.Count > 0)
                            {
                                for (int k = 0; k < aList.Count; k++)
                                {
                                    ATag aTag = aList[k] as ATag;
                                    if (aTag.IsAtagAttach())
                                    {
                                        string     alink  = "http://www.szjsjy.com.cn/" + aTag.Link.Replace("../", "");
                                        BaseAttach attach = ToolDb.GenBaseAttach(aTag.LinkText.Replace("&nbsp", "").Replace(";", "").Replace(";", ""), info.Id, alink);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            info.CtxHtml = dtlList.AsHtml();
                            info.InfoCtx = InfoCtx;
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
コード例 #2
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            //取得页码
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default).GetJsString();
            }
            catch (Exception ex)
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("height", "25")), true), new TagNameFilter("a")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    string temp = pageList.GetATag(pageList.Count - 3).Link.Replace("&", "kdxx") + "kdxx";
                    temp    = temp.GetRegexBegEnd("page=", "kdxx").Replace("&amp;", "");
                    pageInt = Convert.ToInt32(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl + "&page=" + i.ToString(), Encoding.Default).GetJsString();
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "zcfg_right_table")), true), new TagNameFilter("table")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        TableRow tr = table.Rows[j];
                        infoType = "政策法规";
                        headName = tr.Columns[1].ToNodePlainString();

                        infoUrl = "http://www.gzzb.gd.cn" + tr.Columns[1].GetATagHref();
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "contentDiv")));
                        if (dtlList != null && dtlList.Count > 0)
                        {
                            ctxHtml     = dtlList.AsHtml();
                            infoCtx     = ctxHtml.ToCtxString().Replace("○", "〇").Replace("O", "〇");
                            releaseTime = infoCtx.GetChinaTime();

                            if (string.IsNullOrEmpty(releaseTime))
                            {
                                releaseTime = infoCtx.GetDateRegex("yyyy年MM月dd日");
                            }
                            if (string.IsNullOrEmpty(releaseTime))
                            {
                                releaseTime = infoCtx.GetDateRegex();
                            }
                            msgType = MsgTypeCosnt.GuangZhouMsgType;
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "广州市区", string.Empty, infoCtx, infoType);
                            sqlCount++;
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                            {
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList imgList = parser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                                if (imgList != null && imgList.Count > 0)
                                {
                                    for (int img = 0; img < imgList.Count; img++)
                                    {
                                        ImageTag imgTag = imgList[img] as ImageTag;
                                        try
                                        {
                                            BaseAttach obj = null;
                                            if (imgTag.GetAttribute("src").Contains("http"))
                                            {
                                                obj = ToolHtml.GetBaseAttach(imgTag.GetAttribute("src"), headName, info.Id);
                                            }
                                            else
                                            {
                                                obj = ToolHtml.GetBaseAttach("http://www.gzzb.gd.cn" + imgTag.GetAttribute("src"), headName, info.Id);
                                            }
                                            if (obj != null)
                                            {
                                                ToolDb.SaveEntity(obj, string.Empty);
                                            }
                                        }
                                        catch { }
                                    }
                                }
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int a = 0; a < aNode.Count; a++)
                                    {
                                        ATag aTag = aNode[a] as ATag;
                                        if (aTag.IsAtagAttach())
                                        {
                                            try
                                            {
                                                BaseAttach obj = null;
                                                if (aTag.Link.Contains("http"))
                                                {
                                                    obj = ToolHtml.GetBaseAttach(aTag.Link, aTag.LinkText, info.Id);
                                                }
                                                else
                                                {
                                                    obj = ToolHtml.GetBaseAttach("http://www.gzzb.gd.cn" + aTag.Link, aTag.LinkText, info.Id);
                                                }
                                                if (obj != null)
                                                {
                                                    ToolDb.SaveEntity(obj, string.Empty);
                                                }
                                            }
                                            catch { }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
コード例 #3
0
ファイル: ProjectResultSzBaoAn.cs プロジェクト: SHNXJMG/Small
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list     = new List <ProjectResult>();
            int    sqlCount = 0;
            string html     = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + this.MaxCount);
            }
            catch { return(null); }
            int startIndex = html.IndexOf("{");
            int endIndex   = html.LastIndexOf("}");

            html = html.Substring(startIndex, (endIndex + 1) - startIndex);
            JavaScriptSerializer        serializer  = new JavaScriptSerializer();
            Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html);

            foreach (KeyValuePair <string, object> obj in smsTypeJson)
            {
                if (obj.Key == "total")
                {
                    continue;
                }
                object[] array = (object[])obj.Value;
                foreach (object arrValue in array)
                {
                    string Code = string.Empty, prjName = string.Empty, BuildUnit = string.Empty,
                                FinalistsWay = string.Empty, RevStaMethod = string.Empty, SetStaMethod = string.Empty,
                                VoteMethod = string.Empty, RevStaDate = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty,
                                Ctx = string.Empty, Html = string.Empty, beginDate = string.Empty;

                    Dictionary <string, object> dic = (Dictionary <string, object>)arrValue;
                    Code      = Convert.ToString(dic["bdBH"]);
                    prjName   = Convert.ToString(dic["bdName"]);
                    beginDate = Convert.ToString(dic["createTime2"]);
                    string dbjieGuoid = Convert.ToString(dic["dbJieGuoGuid"]);
                    string bdId       = Convert.ToString(dic["bdGuid"]);
                    string ggId       = Convert.ToString(dic["ggGuid"]);
                    string detailUrl  = Convert.ToString(dic["detailUrl"]);



                    InfoUrl = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/queryOldOTDataDetail.do?type=9&id=" + Code;

                    string attachJson = string.Empty;
                    try
                    {
                        Html = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        string temp = Html.GetReplace("\"\"");
                        if (string.IsNullOrWhiteSpace(temp))
                        {
                            InfoUrl = " https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/dbResult_View.do?bdGuid=" + bdId + "&ggGuid=" + ggId + "&dbJieGuoGuid=" + dbjieGuoid;
                            Html    = this.ToolWebSite.GetHtmlByUrl(InfoUrl);
                            string url = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/queryDbJieGuoByGuid.do?guid=" + dbjieGuoid;
                            attachJson = this.ToolWebSite.GetHtmlByUrl(url);
                        }
                    }
                    catch { continue; }


                    string gcName = string.Empty, bdName = string.Empty,
                           zbrName = string.Empty, createTime = string.Empty,
                           lxr = string.Empty, lxdh = string.Empty, dbBanFa = string.Empty, piaoJueBanFa = string.Empty;
                    bool   isChouQian = false;
                    string attachId   = string.Empty;
                    string rwFs       = string.Empty;
                    string unitUrl    = string.Empty;
                    string lxrxx      = string.Empty;
                    string lxdhxx     = string.Empty;
                    if (!string.IsNullOrWhiteSpace(attachJson))
                    {
                        JavaScriptSerializer        newSerializer = new JavaScriptSerializer();
                        Dictionary <string, object> newTypeJson   = (Dictionary <string, object>)newSerializer.DeserializeObject(attachJson);
                        Dictionary <string, object> kdInfo        = (Dictionary <string, object>)newTypeJson;
                        Dictionary <string, object> ggbd          = (Dictionary <string, object>)kdInfo["ggbd"];
                        Dictionary <string, object> gc            = (Dictionary <string, object>)ggbd["gc"];
                        Dictionary <string, object> bd            = (Dictionary <string, object>)kdInfo["bd"];
                        Dictionary <string, object> bdgc          = (Dictionary <string, object>)bd["gc"];
                        try
                        {
                            attachId = Convert.ToString(kdInfo["attachFileGroupGuid"]);
                        }
                        catch { }
                        try
                        {
                            string ggGuid = Convert.ToString(kdInfo["ggGuid"]);
                            string bdGuid = Convert.ToString(kdInfo["bdGuid"]);
                            unitUrl = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/queryTbrListByBdGuidAndGgGuidForGs.do?bdGuid=" + bdGuid + "&ggGuid=" + ggGuid;
                        }
                        catch { }
                        gcName = Convert.ToString(gc["gcName"]);
                        try
                        {
                            bdName = Convert.ToString(kdInfo["bdName"]);
                        }
                        catch { bdName = gcName; }
                        zbrName    = Convert.ToString(gc["zbRName"]);
                        createTime = Convert.ToString(kdInfo["dbTime"]);
                        createTime = ToolHtml.GetDateTimeByLong(Convert.ToInt64(createTime)).ToString();
                        try
                        {
                            lxr = Convert.ToString(bdgc["lianXiRenName"]);
                        }
                        catch { }
                        try
                        {
                            lxrxx = Convert.ToString(bdgc["jingBanRenName"]);
                        }
                        catch { }
                        try
                        {
                            lxdh = Convert.ToString(bdgc["lianXiRenPhone"]);
                        }
                        catch { }
                        try
                        {
                            lxdhxx = Convert.ToString(bdgc["jingBanRenMobile"]);
                        }
                        catch { }
                        try
                        {
                            rwFs = Convert.ToString(kdInfo["rwFangShi"]);
                        }
                        catch { }
                        try
                        {
                            dbBanFa = Convert.ToString(kdInfo["dbBanFa"]);
                        }
                        catch { }
                        try
                        {
                            piaoJueBanFa = Convert.ToString(kdInfo["piaoJueBanFa"]);
                        }
                        catch { }
                        try
                        {
                            isChouQian = (bool)kdInfo["isChouQian"];
                        }
                        catch { }
                        string surl = " https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/dbResult_View.do?bdGuid=" + bdId + "&ggGuid=" + ggId + "&dbJieGuoGuid=" + dbjieGuoid;
                        attachJson = this.ToolWebSite.GetHtmlByUrl(surl);
                        Html       = attachJson;
                        Parser   parserNew = new Parser(new Lexer(Html));
                        NodeList tableNode = parserNew.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "de_tab1")));
                        if (tableNode != null && tableNode.Count > 0)
                        {
                            Html = tableNode.AsHtml();
                            Html = Html.GetReplace("<td  id=\"ggName\">&nbsp;</td>", "<td  id=\"ggName\">&nbsp;" + prjName + "</td>");
                            Html = Html.GetReplace("<td id=\"bdBH\">&nbsp;</td>", "<td id=\"bdBH\">&nbsp;" + Code + "</td>");
                            Html = Html.GetReplace("<td id=\"bdName\">&nbsp;</td>", "<td id=\"bdName\">&nbsp;" + bdName + "</td>");
                            Html = Html.GetReplace("<td id=\"zbRName\">&nbsp;</td>", "<td id=\"zbRName\">&nbsp;" + zbrName + "</td>");
                            Html = Html.GetReplace("<td id=\"dbTime\">&nbsp;</td>", "<td id=\"dbTime\">&nbsp;" + createTime + "</td>");
                            Html = Html.GetReplace("<td id=\"rwfs\">&nbsp;</td>", "<td id=\"rwfs\">&nbsp;" + rwFs + "</td>");
                            Html = Html.GetReplace("<td id=\"dbBanFa\">&nbsp;</td>", "<td id=\"dbBanFa\">&nbsp;" + dbBanFa + "</td>");
                            Html = Html.GetReplace("<td id=\"lianXiRenName\">&nbsp;</td>", "<td id=\"lianXiRenName\">&nbsp;" + lxrxx + "</td>");
                            Html = Html.GetReplace("<td id=\"lianXiRenPhone\">&nbsp;</td>", "<td id=\"lianXiRenPhone\">&nbsp;" + lxdhxx + "</td>");
                            Ctx  = Html.Replace("</tr>", "\r\n").ToCtxString();
                        }
                    }

                    string   resultCtx = string.Empty;
                    Parser   parser    = new Parser(new Lexer(Html.GetReplace("\\\"", "\"").GetReplace("0:00:00", "")));
                    NodeList listNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "de_tab1")));
                    if (listNode != null && listNode.Count > 0)
                    {
                        TableTag table = listNode[0] as TableTag;
                        for (int r = 0; r < table.RowCount; r++)
                        {
                            for (int c = 0; c < table.Rows[r].ColumnCount; c++)
                            {
                                string temp = table.Rows[r].Columns[c].ToPlainTextString().GetReplace(":,:");
                                if (c % 2 == 0)
                                {
                                    resultCtx += temp + ":";
                                }
                                else
                                {
                                    resultCtx += temp + "\r\n";
                                }
                            }
                        }
                    }

                    string strTmp = string.Empty;
                    if (!string.IsNullOrEmpty(unitUrl))
                    {
                        string unithtml = string.Empty;
                        try
                        {
                            unithtml = this.ToolWebSite.GetHtmlByUrl(unitUrl);
                        }
                        catch { }
                        object[] unitTypeJson = (object[])serializer.DeserializeObject(unithtml);
                        if (unitTypeJson.Length > 0)
                        {
                            List <LongGangResult> unitLists = this.GetUnits(unitTypeJson);
                            if (isChouQian)
                            {
                                strTmp += "<table width='100%' border='0' class='de_tab2'>";
                                strTmp += "<tr>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>序号</th>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>投标人名称</th>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>投标时间</th>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>中标候选人</th>";
                                strTmp += "</tr>";
                                foreach (LongGangResult unitInfo in unitLists.OrderBy(x => x.Xh))
                                {
                                    strTmp = strTmp + "<tr>";
                                    strTmp = strTmp + "<td style='padding: 0px'>" + unitInfo.Xh + "</td>";
                                    strTmp = strTmp + "<td style='padding: 0px'>" + unitInfo.UnitName + "</td>";
                                    strTmp = strTmp + "<td style='padding: 0px'>" + unitInfo.TbDate + "</td>";
                                    if (unitInfo.BidStatus == "3")
                                    {
                                        strTmp = strTmp + "<td><input type='checkbox'  checked=true disabled=true/></td>";
                                    }
                                    else
                                    {
                                        strTmp = strTmp + "<td><input type='checkbox' disabled=true/></td>";
                                    }
                                    strTmp = strTmp + "</tr>";
                                }
                                strTmp = strTmp + "</table>";
                            }
                            else if (dbBanFa == "其他方法")
                            {
                                strTmp += "<table width='100%' border='0' class='de_tab2'>";
                                strTmp += "<tr>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>序号</th>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>企业名称</th>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>确定中标候选人</th>";
                                strTmp += "</tr>";

                                foreach (LongGangResult unitInfo in unitLists.OrderBy(x => x.Xh))
                                {
                                    strTmp = strTmp + "<tr>";
                                    strTmp = strTmp + "<td style='padding: 0px'>" + unitInfo.Xh + "</td>";
                                    strTmp = strTmp + "<td style='padding: 0px'>" + unitInfo.UnitName + "</td>";
                                    if (unitInfo.BidStatus == "3")
                                    {
                                        strTmp = strTmp + "<td><input type='checkbox'  checked=true disabled=true/></td>";
                                    }
                                    else
                                    {
                                        strTmp = strTmp + "<td><input type='checkbox' disabled=true/></td>";
                                    }
                                    strTmp = strTmp + "</tr>";
                                }
                                strTmp = strTmp + "</table>";
                            }
                            else if (dbBanFa == "逐轮淘汰")
                            {
                                strTmp += "<table width='100%' border='0' class='de_tab2'>";
                                strTmp += "<tr>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>序号</th>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>投标人名称</th>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>投标报价(元)</th>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>投标时间</th>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>是否入围</th>";
                                strTmp += "</tr>";
                                foreach (LongGangResult unitInfo in unitLists.OrderBy(x => x.Xh))
                                {
                                    strTmp = strTmp + "<tr>";
                                    strTmp = strTmp + "<td style='padding: 0px'>" + unitInfo.Xh + "</td>";
                                    strTmp = strTmp + "<td style='padding: 0px'>" + unitInfo.UnitName + "</td>";
                                    strTmp = strTmp + "<td style='padding: 0px'>" + unitInfo.BidMoney + "</td>";
                                    strTmp = strTmp + "<td style='padding: 0px'>" + unitInfo.TbDate + "</td>";
                                    if (unitInfo.IsNo == "是")
                                    {
                                        strTmp = strTmp + "<td><input type='checkbox'  checked=true disabled=true/></td>";
                                    }
                                    else
                                    {
                                        strTmp = strTmp + "<td><input type='checkbox' disabled=true/></td>";
                                    }

                                    strTmp = strTmp + "</tr>";
                                }
                                strTmp = strTmp + "</table>";
                            }
                            else if (dbBanFa == "集体议事法")
                            {
                                strTmp += "<table width='100%' border='0' class='de_tab2'>";
                                strTmp += "<tr>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>序号</th>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>企业名称</th>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>确定中标候选人</th>";
                                strTmp += "</tr>";

                                foreach (LongGangResult unitInfo in unitLists.OrderBy(x => x.Code))
                                {
                                    strTmp = strTmp + "<tr>";
                                    strTmp = strTmp + "<td style='padding: 0px'>" + unitInfo.Code + "</td>";
                                    strTmp = strTmp + "<td style='padding: 0px'>" + unitInfo.UnitName + "</td>";
                                    if (unitInfo.IsNo == "是")
                                    {
                                        strTmp = strTmp + "<td><input type='checkbox'  checked=true disabled=true/></td>";
                                    }
                                    else
                                    {
                                        strTmp = strTmp + "<td><input type='checkbox' disabled=true/></td>";
                                    }
                                    strTmp = strTmp + "</tr>";
                                }
                                strTmp = strTmp + "</table>";
                            }
                            else if (dbBanFa == "价格竞争法")
                            {
                                strTmp += "<table width='100%' border='0' class='de_tab2'>";
                                strTmp += "<tr>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>序号</th>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>企业名称</th>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>确定中标候选人</th>";
                                strTmp += "</tr>";

                                foreach (LongGangResult unitInfo in unitLists.OrderBy(x => x.Xh))
                                {
                                    strTmp = strTmp + "<tr>";
                                    strTmp = strTmp + "<td style='padding: 0px'>" + unitInfo.Xh + "</td>";
                                    strTmp = strTmp + "<td style='padding: 0px'>" + unitInfo.UnitName + "</td>";
                                    if (unitInfo.BidStatus == "3")
                                    {
                                        strTmp = strTmp + "<td><input type='checkbox'  checked=true disabled=true/></td>";
                                    }
                                    else
                                    {
                                        strTmp = strTmp + "<td><input type='checkbox' disabled=true/></td>";
                                    }
                                    strTmp = strTmp + "</tr>";
                                }
                                strTmp = strTmp + "</table>";
                            }
                            else if (piaoJueBanFa == "简单多数法")
                            {
                                strTmp += "<table width='100%' border='0' class='de_tab2'>";
                                strTmp += "<tr>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>编号</th>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>投标单位</th>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>得票数</th>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>排名</th>";
                                strTmp += "</tr>";
                                foreach (LongGangResult unitInfo in unitLists.OrderBy(x => x.Code))
                                {
                                    strTmp = strTmp + "<tr>";
                                    strTmp = strTmp + "<td style='padding: 0px'>" + unitInfo.Code + "</td>";
                                    strTmp = strTmp + "<td style='padding: 0px'>" + unitInfo.UnitName + "</td>";
                                    strTmp = strTmp + "<td style='padding: 0px'>" + unitInfo.Piao + "</td>";
                                    strTmp = strTmp + "<td style='padding: 0px'>" + unitInfo.Xh + "</td>";
                                    strTmp = strTmp + "</tr>";
                                }
                                strTmp = strTmp + "</table>";
                            }
                            else if (piaoJueBanFa == "一对一比较法")
                            {
                                strTmp += "<table width='100%' border='0' class='de_tab2'>";
                                strTmp += "<tr>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>编号</th>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>投标单位</th>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>取胜次数</th>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>排名</th>";
                                strTmp += "</tr>";
                                foreach (LongGangResult unitInfo in unitLists.OrderBy(x => x.Code))
                                {
                                    strTmp = strTmp + "<tr>";
                                    strTmp = strTmp + "<td style='padding: 0px'>" + unitInfo.Code + "</td>";
                                    strTmp = strTmp + "<td style='padding: 0px'>" + unitInfo.UnitName + "</td>";
                                    strTmp = strTmp + "<td style='padding: 0px'>" + unitInfo.Piao + "</td>";
                                    strTmp = strTmp + "<td style='padding: 0px'>" + unitInfo.Xh + "</td>";
                                    strTmp = strTmp + "</tr>";
                                }
                                strTmp = strTmp + "</table>";
                            }
                            else
                            {
                                strTmp += "<table width='100%' border='0' class='de_tab2'>";
                                strTmp += "<tr>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>编号</th>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>投标单位</th>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>取胜次数</th>";
                                strTmp += "<th style='text-align: left' class='bg_tdtop'>排名</th>";
                                strTmp += "</tr>";
                                foreach (LongGangResult unitInfo in unitLists.OrderBy(x => x.Code))
                                {
                                    strTmp = strTmp + "<tr>";
                                    strTmp = strTmp + "<td style='padding: 0px'>" + unitInfo.Code + "</td>";
                                    strTmp = strTmp + "<td style='padding: 0px'>" + unitInfo.UnitName + "</td>";
                                    strTmp = strTmp + "<td style='padding: 0px'>" + unitInfo.Piao + "</td>";
                                    strTmp = strTmp + "<td style='padding: 0px'>" + unitInfo.Xh + "</td>";
                                    strTmp = strTmp + "</tr>";
                                }
                                strTmp = strTmp + "</table>";
                            }
                        }
                    }

                    Ctx       = Html.GetReplace("</tr> ", "\r\n").ToCtxString();
                    BuildUnit = resultCtx.GetRegex("建设单位").GetReplace("&nbsp", "");
                    if (string.IsNullOrEmpty(BuildUnit))
                    {
                        BuildUnit = zbrName;
                    }
                    FinalistsWay = resultCtx.GetRegex("入围方式").GetReplace("&nbsp", "");
                    RevStaMethod = resultCtx.GetRegex("评标方法");
                    SetStaMethod = resultCtx.GetRegex("定标方法").GetReplace("&nbsp", "");
                    VoteMethod   = resultCtx.GetRegex("票决方法");
                    RevStaDate   = resultCtx.GetRegex("定标时间").GetDateRegex();
                    if (string.IsNullOrEmpty(RevStaDate))
                    {
                        RevStaDate = createTime;
                    }

                    if (!string.IsNullOrWhiteSpace(strTmp))
                    {
                        Html += strTmp;
                        Ctx   = Html.GetReplace("</tr> ", "\r\n").ToCtxString();
                    }

                    MsgType = "深圳市建设工程交易中心宝安分中心";

                    ProjectResult info = ToolDb.GetProjectResult("广东省", "深圳宝安区工程", "宝安区", Code, prjName, BuildUnit, FinalistsWay, RevStaMethod, SetStaMethod,
                                                                 VoteMethod, RevStaDate, detailUrl, MsgType, Ctx, Html, beginDate);
                    sqlCount++;

                    if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx))
                    {
                        if (!string.IsNullOrWhiteSpace(attachId))
                        {
                            string url = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/filegroup/queryByGroupGuidZS.do?groupGuid=" + attachId;

                            string attachHtml = string.Empty;
                            try
                            {
                                attachHtml = this.ToolWebSite.GetHtmlByUrl(url);
                            }
                            catch { }
                            if (!string.IsNullOrWhiteSpace(attachHtml))
                            {
                                JavaScriptSerializer        newSerializers = new JavaScriptSerializer();
                                Dictionary <string, object> newTypeJsons   = (Dictionary <string, object>)newSerializers.DeserializeObject(attachHtml);
                                Dictionary <string, object> mofo           = (Dictionary <string, object>)newTypeJsons;
                                object[] objs = (object[])mofo["rows"];
                                foreach (object objAttach in objs)
                                {
                                    Dictionary <string, object> attachs = (Dictionary <string, object>)objAttach;
                                    string     attachguid = Convert.ToString(attachs["attachGuid"]);
                                    string     attachName = Convert.ToString(attachs["attachName"]);
                                    string     link       = "https://www.szjsjy.com.cn:8001/file/downloadFile?fileId=" + attachguid;
                                    BaseAttach attach     = ToolHtml.GetBaseAttach(link, attachName, info.Id, "SiteManage\\Files\\Attach\\");
                                    if (attach != null)
                                    {
                                        ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                                    }
                                }
                            }
                        }
                        else
                        {
                            parser = new Parser(new Lexer(Html));
                            NodeList fileNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (fileNode != null && fileNode.Count > 0)
                            {
                                for (int f = 0; f < fileNode.Count; f++)
                                {
                                    ATag tag = fileNode[f] as ATag;
                                    if (tag.IsAtagAttach() || tag.Link.ToLower().Contains("downloadfile"))
                                    {
                                        try
                                        {
                                            BaseAttach attach = null;
                                            string     link   = string.Empty;
                                            if (tag.Link.ToLower().Contains("http"))
                                            {
                                                link = tag.Link;
                                                if (link.StartsWith("\\"))
                                                {
                                                    link = link.Substring(link.IndexOf("\\"), link.Length - link.IndexOf("\\"));
                                                }
                                                if (link.EndsWith("//"))
                                                {
                                                    link = link.Remove(link.LastIndexOf("//"));
                                                }
                                                link = link.GetReplace("\\", "");
                                            }
                                            else
                                            {
                                                link = "https://www.szjsjy.com.cn:8001/" + tag.Link;
                                            }
                                            attach = ToolHtml.GetBaseAttach(link, tag.LinkText, info.Id, "SiteManage\\Files\\Attach\\");

                                            if (attach != null)
                                            {
                                                ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                                            }
                                        }
                                        catch { continue; }
                                    }
                                }
                            }
                        }
                    }
                    if (!crawlAll && sqlCount >= this.MaxCount)
                    {
                        return(null);
                    }
                }
            }
            return(list);
        }
コード例 #4
0
ファイル: NoticeHuBeiQtxm.cs プロジェクト: SHNXJMG/Small
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <NoticeInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default, ref cookiestr);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("nowrap", "true")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("总页数", "当前页").Replace(":", "");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState = this.ToolWebSite.GetAspNetViewState(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__VIEWSTATE",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT"
                    }, new string[] {
                        viewState,
                        "MoreInfoList1$Pager",
                        i.ToString()
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string InfoTitle = string.Empty, InfoType = string.Empty, PublistTime = string.Empty, InfoCtx = string.Empty, InfoUrl = string.Empty, prjCode = string.Empty, buildUnit = string.Empty, htmlTxt = string.Empty;
                        InfoType = "澄清修改通知";
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        InfoTitle   = aTag.GetAttribute("title");
                        PublistTime = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoUrl     = "http://www.hbggzy.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            htmlTxt = dtlNode.AsHtml();
                            InfoCtx = htmlTxt.GetReplace("</p>,<br />,<br/>", "\r\n").ToCtxString();
                            NoticeInfo info = ToolDb.GenNoticeInfo("湖北省", "湖北省及地市", "", string.Empty, InfoTitle, InfoType, InfoCtx, PublistTime, string.Empty, "湖北省公共资源交易中心", InfoUrl, prjCode, buildUnit, string.Empty, string.Empty, "其他项目", string.Empty, htmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(htmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.hbggzy.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
コード例 #5
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new List <BidInfo>();
            //取得页码
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(list);
            }

            Parser   parser  = new Parser(new Lexer(html));
            NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("bgColor", "#EEF4F9")));

            if (tdNodes != null && tdNodes.Count > 0)
            {
                string pageTemp = tdNodes.AsString().Replace("&nbsp;", "").Replace(" ", "").Trim();
                Regex  regpage  = new Regex(@"1/[0-9]+页");
                try
                {
                    pageInt = int.Parse(regpage.Match(pageTemp).Value.Split('/')[1].Replace("页", "").Trim());
                }
                catch (Exception ex) { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.szzdzb.cn/Product-index-id-11-p-" + i + ".html", Encoding.UTF8);
                    }
                    catch
                    {
                        continue;
                    }
                }

                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "hangao27"))), new TagNameFilter("table")));

                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string   prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr = table.Rows[j];
                        code    = tr.Columns[0].ToPlainTextString().Trim();
                        prjName = tr.Columns[1].ToPlainTextString().Trim();
                        ATag aTag = tr.Columns[1].SearchFor(typeof(ATag), true)[0] as ATag;
                        InfoUrl = "http://www.szzdzb.cn" + aTag.Link;
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace("&nbsp;", "").GetJsString();
                        }
                        catch { continue; }
                        Parser   dtlparser = new Parser(new Lexer(htmldetail));
                        NodeList dtnode    = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "hangao27"))), new TagNameFilter("table")));
                        if (dtnode != null && dtnode.Count > 0)
                        {
                            HtmlTxt   = dtnode.AsHtml();
                            bidCtx    = HtmlTxt.ToLower().GetReplace("</p>,<br/>", "\r\n").ToCtxString();
                            beginDate = bidCtx.GetRegex("发布时间").GetDateRegex();
                            if (bidCtx.Contains("确定中标供应商"))
                            {
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList nodeTab = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "pContent"))), new TagNameFilter("table")));
                                if (nodeTab != null && nodeTab.Count > 0)
                                {
                                    TableTag tabNode = nodeTab[0] as TableTag;
                                    for (int r = 0; r < tabNode.RowCount; r++)
                                    {
                                        try
                                        {
                                            if (tabNode.Rows[r].ToNodePlainString().Contains("确定中标供应商"))
                                            {
                                                bidUnit  = tabNode.Rows[r + 1].Columns[1].ToNodePlainString();
                                                bidMoney = tabNode.Rows[r + 2].Columns[1].ToNodePlainString().Replace(",", "").Replace(",", "").GetMoney("万元");
                                                break;
                                            }
                                        }
                                        catch { }
                                    }
                                }
                                if (string.IsNullOrWhiteSpace(bidUnit))
                                {
                                    bidUnit = bidCtx.GetBidRegex();
                                }
                                if (bidMoney == "0" || string.IsNullOrWhiteSpace(bidMoney))
                                {
                                    bidMoney = bidCtx.Replace(",", "").Replace(",", "").GetMoneyRegex();
                                }
                            }
                            else
                            {
                                bidUnit = bidCtx.GetBidRegex(new string[] { "第一备选供应商" });
                                parser  = new Parser(new Lexer(HtmlTxt));
                                NodeList nodeTab = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "pContent"))), new TagNameFilter("table")));
                                if (nodeTab != null && nodeTab.Count > 0)
                                {
                                    TableTag tabNode = nodeTab[0] as TableTag;
                                    for (int r = 0; r < tabNode.RowCount; r++)
                                    {
                                        try
                                        {
                                            if (tabNode.Rows[r].ToNodePlainString().Contains(bidUnit))
                                            {
                                                bidMoney = tabNode.Rows[r].Columns[2].ToNodePlainString().Replace(",", "").Replace(",", "").GetMoney();
                                                break;
                                            }
                                        }
                                        catch { }
                                    }
                                }
                            }

                            specType = "其他";
                            msgType  = "深圳市振东招标代理有限公司";
                            bidType  = ToolHtml.GetInviteTypes(prjName);
                            prjName  = ToolDb.GetPrjName(prjName);
                            BidInfo info = ToolDb.GenBidInfo("广东省", "深圳社会招标", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);

                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }

            return(list);
        }
コード例 #6
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookieStr       = string.Empty;
            int    pageInt         = 1;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "tzgg_right_page")), true), new TagNameFilter("span")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    Span   temp = pageList[pageList.Count - 1] as Span;
                    string tem  = temp.GetAttribute("onclick");
                    pageInt = Convert.ToInt32(tem.Replace("goPage(", "").Replace(")", ""));
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                            "page", "xmlb", "xmjdbmid", "method", "SearchBar", "PageSize"
                        }, new string[] {
                            i.ToString(), "", "", "", "Y", "15"
                        });
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.Default);
                    }
                    catch
                    {
                        continue;
                    }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "table1")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string InfoTitle = string.Empty, InfoType = string.Empty, PublistTime = string.Empty, InfoCtx = string.Empty, InfoUrl = string.Empty, htmlTxt = string.Empty;

                        TableRow tr = table.Rows[j];
                        InfoTitle   = tr.Columns[1].ToNodePlainString();
                        PublistTime = tr.Columns[2].ToPlainTextString();
                        InfoType    = "资审公示";
                        InfoUrl     = "http://www.gzzb.gd.cn" + tr.Columns[1].GetATagHref();
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default);
                            htldtl = htldtl.GetJsString();
                        }
                        catch
                        {
                            continue;
                        }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("valign", "top")));
                        if (dtlList != null && dtlList.Count > 0)
                        {
                            htmlTxt = dtlList.ToHtml();
                            InfoCtx = dtlList.AsString().ToCtxString().Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n");
                        }
                        NoticeInfo info = ToolDb.GenNoticeInfo("广东省", "广州市区", string.Empty, string.Empty, InfoTitle, InfoType, InfoCtx, PublistTime, string.Empty, MsgTypeCosnt.GuangZhouMsgType, InfoUrl, string.Empty, string.Empty, string.Empty, string.Empty, string.Empty, string.Empty, htmlTxt);
                        list.Add(info);
                        parser = new Parser(new Lexer(dtlList.AsHtml()));
                        NodeList aList = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                        if (aList != null && aList.Count > 0)
                        {
                            for (int c = 0; c < aList.Count; c++)
                            {
                                ATag aTag = aList[c].GetATag();
                                if (aTag.IsAtagAttach())
                                {
                                    string     alink  = "http://www.gzzb.gd.cn" + aTag.Link;
                                    BaseAttach attach = ToolDb.GenBaseAttach(aTag.LinkText.Replace("&nbsp", "").Replace(";", "").Replace(";", ""), info.Id, alink);
                                    base.AttachList.Add(attach);
                                }
                            }
                        }
                        if (!crawlAll && list.Count >= this.MaxCount)
                        {
                            return(list);
                        }
                    }
                }
            }
            return(list);
        }
コード例 #7
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default).Replace("&nbsp;", "");
            }
            catch
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "Body_div")), true), new TagNameFilter("li")));

            if (sNode != null && sNode.Count > 0)
            {
                for (int t = 0; t < sNode.Count; t++)
                {
                    string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                    INode node = sNode[t];
                    ATag  aTag = node.GetATag();
                    prjName   = aTag.GetAttribute("title");
                    beginDate = node.ToPlainTextString().GetDateRegex();
                    InfoUrl   = "http://www.tyjzsc.com.cn/" + aTag.Link.GetReplace("./");
                    string htmldtl = string.Empty;
                    try
                    {
                        htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                    }
                    catch { continue; }
                    parser = new Parser(new Lexer(htmldtl));
                    NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("style", "width:650px;")));
                    if (dtlNode != null && dtlNode.Count > 0)
                    {
                        HtmlTxt   = dtlNode.AsHtml();
                        inviteCtx = HtmlTxt.ToCtxString();

                        buildUnit  = inviteCtx.GetBuildRegex();
                        prjAddress = inviteCtx.GetAddressRegex();
                        code       = inviteCtx.GetCodeRegex();
                        msgType    = "太原市建设工程交易中心";
                        specType   = "建设工程";
                        inviteType = prjName.GetInviteBidType();
                        InviteInfo info = ToolDb.GenInviteInfo("山西省", "山西省及地市", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                        list.Add(info);
                        //parser = new Parser(new Lexer(HtmlTxt));
                        //NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                        //if (aNode != null && aNode.Count > 0)
                        //{
                        //    for (int k = 0; k < aNode.Count; k++)
                        //    {
                        //        ATag a = aNode[k] as ATag;
                        //        if (a.IsAtagAttach())
                        //        {
                        //            BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, a.Link);
                        //            base.AttachList.Add(attach);
                        //        }
                        //    }
                        //}
                    }
                    if (!crawlAll && list.Count >= this.MaxCount)
                    {
                        return(list);
                    }
                }
            }
            return(list);
        }
コード例 #8
0
ファイル: NoticeSiChuangGgzy.cs プロジェクト: SHNXJMG/Small
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <NoticeInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("vAlign", "bottom")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("总页数:", "当前");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState = this.ToolWebSite.GetAspNetViewState(html);
                    string __CSRFTOKEN      = ToolHtml.GetHtmlInputValue(html, "__CSRFTOKEN");
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__CSRFTOKEN",
                        "__VIEWSTATE",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT"
                    },
                                                                                      new string[] {
                        __CSRFTOKEN,
                        viewState,
                        "MoreInfoList1$Pager",
                        i.ToString()
                    });
                    try
                    {
                        cookiestr = cookiestr.GetReplace(new string[] { "path=/;", "HttpOnly", "," });
                        html      = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string   InfoTitle = string.Empty, InfoType = string.Empty, PublistTime = string.Empty, InfoCtx = string.Empty, InfoUrl = string.Empty, htmlTxt = string.Empty;
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        InfoTitle   = aTag.GetAttribute("title");
                        PublistTime = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoUrl     = "http://www.spprec.com" + aTag.Link;
                        InfoType    = "变更公告";
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "ivs_content")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            htmlTxt = dtlNode.AsHtml();
                            InfoCtx = htmlTxt.GetReplace("<br />,<br/>,<br>,</p>", "\r\n").ToCtxString();

                            NoticeInfo info = ToolDb.GenNoticeInfo("四川省", "四川省及地市", string.Empty, string.Empty, InfoTitle, InfoType, InfoCtx, PublistTime, string.Empty, "四川省公共资源交易中心", InfoUrl, string.Empty, string.Empty, string.Empty, string.Empty, "建设工程", string.Empty, htmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(htmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag tag = aNode[k] as ATag;
                                    if (tag.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (tag.Link.ToLower().Contains("http"))
                                        {
                                            link = tag.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.spprec.com" + tag.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(tag.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
コード例 #9
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "MoreInfoList1_Pager")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[0].ToPlainTextString().GetRegexBegEnd("总页数", "当前").Replace(":", "");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState = this.ToolWebSite.GetAspNetViewState(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(
                        new string[] {
                        "__VIEWSTATE",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT"
                    },
                        new string[] {
                        viewState,
                        "MoreInfoList1$Pager",
                        i.ToString()
                    }
                        );
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        prjName   = aTag.GetAttribute("title");
                        beginDate = tr.ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://www.gaxqjyzx.com" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt   = dtlNode.AsHtml();
                            inviteCtx = HtmlTxt.ToCtxString();

                            code      = inviteCtx.GetCodeRegex();
                            buildUnit = inviteCtx.GetBuildRegex();
                            if (buildUnit.Contains("招标代理机构"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理机构"));
                            }
                            if (buildUnit.Contains("代理机构"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("代理机构"));
                            }
                            prjAddress = inviteCtx.GetAddressRegex();
                            inviteType = prjName.GetInviteBidType();
                            specType   = "建设工程";
                            msgType    = "贵安新区公共资源交易中心";
                            InviteInfo info = ToolDb.GenInviteInfo("贵州省", "贵州省及地市", "贵安新区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
コード例 #10
0
ファイル: NotifyInfoShanXiJsgc.cs プロジェクト: SHNXJMG/Small
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch { return(null); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("nowrap", "true")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("总页数:", "当");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(
                        new string[] {
                        "__VIEWSTATE",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__EVENTVALIDATION",
                        "MoreInfoList1$txtTitle"
                    },
                        new string[] {
                        viewState,
                        "MoreInfoList1$Pager",
                        i.ToString(),
                        eventValidation,
                        ""
                    }
                        );
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string   headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty, area = string.Empty;
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        headName    = aTag.GetAttribute("title");
                        releaseTime = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        string temp = tr.Columns[1].ToNodePlainString();
                        if (temp.Contains("[") && temp.Contains("]"))
                        {
                            area = temp.Substring(temp.IndexOf("["), temp.IndexOf("]") - temp.IndexOf("[")).GetReplace("[,]");
                        }
                        infoUrl = "http://www.sxszbb.com" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("height", "500")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            ctxHtml  = dtlNode.AsHtml();
                            infoCtx  = ctxHtml.ToCtxString();
                            infoType = "通知公告";
                            msgType  = "陕西省建设工程招标投标管理办公室";
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "陕西省", "陕西省及地市", area, infoCtx, infoType);
                            sqlCount++;
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                            {
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int a = 0; a < aNode.Count; a++)
                                    {
                                        ATag tag = aNode[a] as ATag;
                                        if (tag.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (tag.Link.ToLower().Contains("http"))
                                            {
                                                link = tag.Link;
                                            }
                                            else
                                            {
                                                link = "http://www.sxszbb.com" + tag.Link.GetReplace("../,./");
                                            }
                                            try
                                            {
                                                BaseAttach baseInfo = ToolHtml.GetBaseAttach(link, tag.LinkText, info.Id);
                                                if (baseInfo != null)
                                                {
                                                    ToolDb.SaveEntity(baseInfo, string.Empty);
                                                }
                                            }
                                            catch { }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
コード例 #11
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default, ref cookiestr);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "ctl00_ContentPlaceHolder2_lblSumPage")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[0].ToNodePlainString();
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__VIEWSTATE",
                        "__VIEWSTATEGENERATOR",
                        "__EVENTVALIDATION"
                    }, new string[] {
                        "ctl00$ContentPlaceHolder2$lnkBtnNext",
                        "",
                        viewState,
                        "96852609",
                        eventValidation
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr);
                    }
                    catch
                    {
                        continue;
                    }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "695")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount - 1; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.GetATag();
                        prjName   = aTag.LinkText.Trim();
                        beginDate = tr.Columns[1].ToPlainTextString().GetDateRegex();

                        InfoUrl = "http://js.panyu.gov.cn/" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "ctl00_ContentPlaceHolder1_txtContent")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt   = dtlNode.AsHtml();//.Replace("<br", "\r\n<br");
                            inviteCtx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString();
                            code      = inviteCtx.GetCodeRegex().GetCodeDel();
                            buildUnit = inviteCtx.GetBuildRegex();
                            if (buildUnit.Contains("招标代理"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理"));
                            }
                            if (buildUnit.Contains("地址"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址"));
                            }
                            if (buildUnit.Contains("公司"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                            }

                            prjAddress = inviteCtx.GetAddressRegex();
                            inviteType = prjName.GetInviteBidType();
                            msgType    = "广州市番禺区住房和建设局";
                            specType   = "政府采购";
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "广州政府采购", "番禺区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList fileNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (fileNode != null && fileNode.Count > 0)
                            {
                                for (int k = 0; k < fileNode.Count; k++)
                                {
                                    ATag fileAtag = fileNode[k].GetATag();
                                    if (fileAtag.IsAtagAttach())
                                    {
                                        string fileName = fileAtag.LinkText.ToNodeString().Replace(" ", "");
                                        string fileLink = fileAtag.Link;
                                        if (!fileLink.ToLower().Contains("http"))
                                        {
                                            fileLink = "http://js.panyu.gov.cn/" + fileAtag.Link;
                                        }
                                        base.AttachList.Add(ToolDb.GenBaseAttach(fileName, info.Id, fileLink));
                                    }
                                }
                            }

                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
コード例 #12
0
ファイル: BidJiangXiJiaoTong.cs プロジェクト: SHNXJMG/Small
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("li"), new HasAttributeFilter("class", "wb-page-li")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("/", "\r");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                SiteUrl = "http://www.jxsggzy.cn/web/jyxx/002002/002002005/" + i + ".html";
                try
                {
                    html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr);
                }
                catch { continue; }

                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("li"), new HasAttributeFilter("class", "ewb-list-node clearfix")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string prjName = string.Empty,
                               buildUnit = string.Empty, bidUnit = string.Empty,
                               bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty,
                               beginDate = string.Empty,
                               endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty,
                               msgType = string.Empty, bidCtx = string.Empty,
                               prjAddress = string.Empty, remark = string.Empty,
                               prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;

                        ATag aTag = listNode[j].GetATag();
                        prjName = aTag.GetAttribute("title");
                        if (string.IsNullOrWhiteSpace(prjName))
                        {
                            prjName = aTag.LinkText;
                        }
                        beginDate = listNode[j].ToPlainTextString().GetDateRegex();
                        if (prjName[2].Equals('县') || prjName[2].Equals('区') || prjName[2].Equals('市'))
                        {
                            area = prjName.Substring(0, 3);
                        }
                        InfoUrl = "http://www.jxsggzy.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "article-info")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.AsHtml();
                            bidCtx  = HtmlTxt.Replace("<br/>", "\r\n").ToCtxString();

                            buildUnit = bidCtx.GetBuildRegex();

                            if (string.IsNullOrEmpty(buildUnit))
                            {
                                buildUnit = bidCtx.GetRegexBegEnd("招标人名称:", "项目");
                            }
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList dtlBidNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "MsoNormalTable")));
                            if (dtlBidNode != null && dtlBidNode.Count > 0)
                            {
                                TableTag bidTable = dtlBidNode[0] as TableTag;
                                string   ctx      = string.Empty;
                                for (int r = 0; r < bidTable.RowCount; r++)
                                {
                                    for (int c = 0; c < bidTable.Rows[r].ColumnCount; c++)
                                    {
                                        string temp = bidTable.Rows[r].Columns[c].ToNodePlainString();
                                        if (string.IsNullOrEmpty(temp))
                                        {
                                            continue;
                                        }
                                        if ((c + 1) % 2 == 0)
                                        {
                                            ctx += temp + "\r\n";
                                        }
                                        else
                                        {
                                            ctx += temp + ":";
                                        }
                                    }
                                }
                                prjAddress = ctx.GetAddressRegex();
                                buildUnit  = ctx.GetBuildRegex();
                                bidUnit    = ctx.GetBidRegex(new string[] { "第一名" });
                                if (string.IsNullOrEmpty(bidUnit))
                                {
                                    bidUnit = ctx.GetRegex("第一");
                                }
                                bidMoney = ctx.GetMoneyRegex();
                                prjMgr   = ctx.GetMgrRegex(new string[] { "建造师" });
                                code     = ctx.GetCodeRegex();
                            }
                            else
                            {
                                prjAddress = bidCtx.GetAddressRegex();
                                buildUnit  = bidCtx.GetBuildRegex();
                                bidUnit    = bidCtx.GetBidRegex();
                                if (string.IsNullOrEmpty(bidUnit))
                                {
                                    bidUnit = bidCtx.GetRegex("第一中标候选人");
                                }
                                bidMoney = bidCtx.GetMoneyString().GetMoney("万元");
                                prjMgr   = bidCtx.GetMgrRegex();
                                if (string.IsNullOrEmpty(prjMgr))
                                {
                                    prjMgr = bidCtx.GetRegex("注册监理工程师");
                                }
                                code = bidCtx.GetCodeRegex();
                            }

                            bidType  = "交通工程";
                            specType = "政府采购";
                            msgType  = "江西省公共资源交易中心";
                            BidInfo info = ToolDb.GenBidInfo("江西省", "江西省及地市", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
コード例 #13
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("id", "id1")));

            if (sNode != null && sNode.Count > 0)
            {
                try
                {
                    SelectTag tag = sNode[0] as SelectTag;

                    string temp = tag.OptionTags[tag.OptionTags.Length - 1].StringText;
                    pageInt = int.Parse(temp.GetReplace("第,页"));
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&page=" + i, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList viewList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellpadding", "4")));

                if (viewList != null && viewList.Count > 0)
                {
                    TableTag table = viewList[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        TableRow tr = table.Rows[j];
                        if (tr.ColumnCount == 1)
                        {
                            continue;
                        }
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        ATag aTag = tr.Columns[1].GetATag();
                        beginDate = tr.Columns[1].ToPlainTextString().GetDateRegex();

                        prjName = aTag.GetAttribute("title");

                        InfoUrl = "http://liaobu.dg.gov.cn/" + aTag.Link;
                        string htmDtl = string.Empty;
                        try
                        {
                            htmDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default);
                            Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>|<style[^<]*</style>|<xml[^<]*</xml>");
                            htmDtl = regexHtml.Replace(htmDtl, "");
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmDtl));
                        NodeList dtl = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "cont")));
                        if (dtl != null && dtl.Count > 0)
                        {
                            HtmlTxt    = dtl.AsHtml();
                            inviteCtx  = HtmlTxt.ToCtxString();
                            inviteType = prjName.GetInviteBidType();
                            prjAddress = inviteCtx.GetAddressRegex();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            code       = inviteCtx.GetCodeRegex().GetCodeDel();


                            msgType    = "东莞市寮步镇政府";
                            specType   = "政府采购";
                            inviteType = prjName.GetInviteBidType();
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "东莞市区", "寮步镇", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);

                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k].GetATag();
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://liaobu.dg.gov.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
コード例 #14
0
ファイル: BidHuiNanGgzy.cs プロジェクト: SHNXJMG/Small
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "page")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[0].ToPlainTextString().GetRegexBegEnd("共", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&Page=" + i);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "list_con_main_bulcon")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        INode node = listNode[j];
                        ATag  aTag = node.GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }
                        string prjName = string.Empty,
                               buildUnit = string.Empty, bidUnit = string.Empty,
                               bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty,
                               beginDate = string.Empty,
                               endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty,
                               msgType = string.Empty, bidCtx = string.Empty,
                               prjAddress = string.Empty, remark = string.Empty,
                               prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        prjName   = aTag.GetAttribute("title");
                        beginDate = node.ToNodePlainString().GetDateRegex();
                        string linkId = aTag.Link.GetRegexBegEnd("Id=", "&");
                        InfoUrl = "http://www.hngzzx.com/HomePage/ShowInfoDetail.aspx?Id=" + linkId + "&TableID=1";
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "detail_con")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml();
                            bidCtx     = HtmlTxt.GetReplace("</p>,<br/>", "\r\n").ToCtxString();
                            prjAddress = bidCtx.GetAddressRegex();
                            buildUnit  = bidCtx.GetBuildRegex();
                            code       = bidCtx.GetCodeRegex().GetCodeDel();

                            bidUnit = bidCtx.GetBidRegex();
                            if (string.IsNullOrWhiteSpace(bidUnit))
                            {
                                bidUnit = bidCtx.GetRegex("第一名", false);
                            }
                            bidMoney = bidCtx.GetMoneyRegex();
                            prjMgr   = bidCtx.GetMgrRegex();

                            if (string.IsNullOrEmpty(bidUnit))
                            {
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("br"));
                                if (tableNode != null && tableNode.Count > 0)
                                {
                                    bool   isOk = false;
                                    string ctx  = string.Empty;
                                    for (int t = 0; t < tableNode.Count; t++)
                                    {
                                        if (tableNode[t].ToPlainTextString().Contains("供应商名称"))
                                        {
                                            isOk = true;
                                            TableTag tag = tableNode[t] as TableTag;
                                            if (tag.RowCount > 2)
                                            {
                                                for (int c = 0; c < tag.Rows[0].ColumnCount; c++)
                                                {
                                                    ctx += tag.Rows[0].Columns[c].ToNodePlainString() + ":";
                                                    try
                                                    {
                                                        ctx += tag.Rows[1].Columns[c].ToNodePlainString() + "\r\n";
                                                    }
                                                    catch { }
                                                }
                                            }
                                            break;
                                        }
                                    }
                                    if (!isOk)
                                    {
                                        for (int t = 0; t < tableNode.Count; t++)
                                        {
                                            if (tableNode.AsString().Contains("中标候选人"))
                                            {
                                                isOk = true;
                                                TableTag tag = tableNode[t] as TableTag;
                                                if (tag.RowCount > 2)
                                                {
                                                    for (int c = 0; c < tag.Rows[0].ColumnCount; c++)
                                                    {
                                                        ctx += tag.Rows[0].Columns[0].ToNodePlainString() + ":";
                                                        try
                                                        {
                                                            ctx += tag.Rows[1].Columns[0].ToNodePlainString() + "\r\n";
                                                        }
                                                        catch { }
                                                    }
                                                }
                                                break;
                                            }
                                        }
                                        if (!isOk)
                                        {
                                            for (int t = 0; t < tableNode.Count; t++)
                                            {
                                                if (tableNode.AsString().Contains("中标单位") || tableNode.AsString().Contains("中标候选单位") || tableNode.AsString().Contains("投标人名称"))
                                                {
                                                    isOk = true;
                                                    TableTag tag = tableNode[t] as TableTag;
                                                    if (tag.RowCount > 2)
                                                    {
                                                        for (int c = 0; c < tag.Rows[0].ColumnCount; c++)
                                                        {
                                                            ctx += tag.Rows[0].Columns[0].ToNodePlainString() + ":";
                                                            try
                                                            {
                                                                ctx += tag.Rows[1].Columns[0].ToNodePlainString() + "\r\n";
                                                            }
                                                            catch { }
                                                        }
                                                    }
                                                    break;
                                                }
                                            }
                                        }
                                    }

                                    bidUnit = ctx.GetBidRegex();
                                    if (string.IsNullOrEmpty(bidUnit))
                                    {
                                        bidUnit = ctx.GetRegex("第一名,供应商名称,投标人名称");
                                    }
                                    string money = ctx.GetMoneyRegex();
                                    if (string.IsNullOrEmpty(money) || bidMoney != money)
                                    {
                                        bidMoney = money;
                                    }
                                    if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0")
                                    {
                                        bidMoney = ctx.GetRegex("中标金额(单位:元),最终报价,投标报价(元)", false).GetMoney();
                                    }
                                    if (string.IsNullOrEmpty(prjMgr))
                                    {
                                        prjMgr = bidCtx.GetMgrRegex();
                                    }
                                }
                            }
                            if (bidUnit.Contains("公司"))
                            {
                                bidUnit = bidUnit.Remove(bidUnit.IndexOf("公司")) + "公司";
                            }
                            if (bidUnit.Contains("研究院"))
                            {
                                bidUnit = bidUnit.Remove(bidUnit.IndexOf("研究院")) + "研究院";
                            }
                            if (bidUnit.Contains("开发局"))
                            {
                                bidUnit = bidUnit.Remove(bidUnit.IndexOf("开发局")) + "开发局";
                            }
                            if (bidUnit.Contains("名称") || bidUnit.Contains("联系人") || bidUnit.Contains("报价") || bidUnit.Contains("内容"))
                            {
                                bidUnit = string.Empty;
                            }
                            bidUnit = bidUnit.GetReplace("1,2,3,、");
                            if (code.Contains("代理"))
                            {
                                code = code.Remove(code.IndexOf("代理"));
                            }
                            try
                            {
                                if (decimal.Parse(bidMoney) > 10000)
                                {
                                    bidMoney = (decimal.Parse(bidMoney) / 10000).ToString();
                                }
                            }
                            catch { }
                            specType = bidType = "政府采购";
                            msgType  = "湖南省公共资源交易中心";

                            BidInfo info = ToolDb.GenBidInfo("湖南省", "湖南省及地市", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }

            return(list);
        }
コード例 #15
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "yesh fl")));

            if (sNode != null && sNode.Count > 0)
            {
                try
                {
                    string temp = sNode[0].ToNodePlainString().GetRegexBegEnd("/", "页");
                    pageInt = int.Parse(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.szlhxq.gov.cn/mzbsc/zwgk69/cgzb/zbgg282/14843-" + i.ToString() + ".html", Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList viewList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "news1_list")), true), new TagNameFilter("li")));
                if (viewList != null && viewList.Count > 0)
                {
                    for (int j = 0; j < viewList.Count; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        beginDate = viewList[j].ToNodePlainString().GetDateRegex();
                        ATag aTag = viewList[j].GetATag();
                        prjName = aTag.GetAttribute("title");
                        InfoUrl = "http://www.szlhxq.gov.cn" + aTag.Link;
                        string htmDtl = string.Empty;
                        try
                        {
                            htmDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmDtl));
                        NodeList dtl = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "tit-content")));
                        if (dtl != null && dtl.Count > 0)
                        {
                            HtmlTxt    = dtl.AsHtml();
                            inviteCtx  = System.Text.RegularExpressions.Regex.Replace(HtmlTxt, "(<script)[\\s\\S]*?(</script>)", "");
                            inviteCtx  = System.Text.RegularExpressions.Regex.Replace(inviteCtx.Replace("<br/>", "\r\n").Replace("<BR/>", "\r\n").Replace("<BR>", "\r\n").Replace("<br>", "\r\n"), "<[^>]*>", "").Replace("&nbsp;", "").Replace(" ", "").Replace("\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\r\n", "\r\n").Replace("\r\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\t", "").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n");
                            inviteType = prjName.GetInviteBidType();
                            prjAddress = inviteCtx.GetAddressRegex();
                            if (prjAddress.Contains("**"))
                            {
                                prjAddress = string.Empty;
                            }
                            buildUnit = inviteCtx.GetBuildRegex();
                            if (buildUnit.Contains("资质"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("资质"));
                            }
                            code    = inviteCtx.GetCodeRegex().GetCodeDel();
                            msgType = "深圳市龙华新区民治街道办事处";
                            if (string.IsNullOrEmpty(prjAddress))
                            {
                                prjAddress = "见中标信息";
                            }
                            specType   = "建设工程";
                            inviteType = "小型工程";
                            if (string.IsNullOrEmpty(buildUnit))
                            {
                                buildUnit = "深圳市龙华新区民治街道办事处";
                            }
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳区及街道工程", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
コード例 #16
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new ArrayList();
            //取得页码
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser  = new Parser(new Lexer(html));
            NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "AspNetPager1")));

            if (tdNodes != null && tdNodes.Count > 0)
            {
                string htlPage = tdNodes.ToHtml();
                parser = new Parser(new Lexer(htlPage));
                NodeFilter filer    = new TagNameFilter("a");
                NodeList   pageList = parser.ExtractAllNodesThatMatch(filer);
                if (pageList != null && pageList.Count > 0)
                {
                    for (int i = pageList.Count - 1; i >= 0; i--)
                    {
                        try
                        {
                            ATag   aTag     = pageList.SearchFor(typeof(ATag), true)[i] as ATag;
                            string pageTemp = aTag.Link.Replace("main.aspx?flg=3&id=6&page=", "");
                            pageInt = int.Parse(pageTemp);
                            break;
                        }
                        catch (Exception ex) { }
                    }
                }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl + "&page=" + i.ToString()), Encoding.UTF8);
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "760")));

                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string   code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr = table.Rows[j];

                        beginDate = tr.Columns[2].ToPlainTextString().Trim();
                        ATag aTag = tr.Columns[1].SearchFor(typeof(ATag), true)[0] as ATag;
                        prjName = aTag.LinkText;
                        InfoUrl = "http://www.uho.cn/" + aTag.Link;
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace("&nbsp;", "").Trim();
                            Parser   dtlparserHTML = new Parser(new Lexer(htmldetail));
                            NodeList dtnodeHTML    = dtlparserHTML.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("bordercolor", "#FFFFFF"), new TagNameFilter("table")));
                            HtmlTxt    = dtnodeHTML.AsHtml();
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).ToLower().Replace("&nbsp;", "").Replace("</br>", "\r\n").Replace("<br>", "\r\n").Replace("<br/>", "\r\n");
                            Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>");
                            htmldetail = regexHtml.Replace(htmldetail, "");
                        }
                        catch (Exception ex) { continue; }
                        Parser   dtlparser = new Parser(new Lexer(htmldetail));
                        NodeList dtnode    = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("bordercolor", "#ffffff"), new TagNameFilter("table")));
                        inviteCtx  = dtnode.AsString();
                        specType   = "其他";
                        msgType    = "深圳市友和保险经纪有限公司";
                        inviteType = ToolHtml.GetInviteTypes(prjName);
                        InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳社会招标", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                        list.Add(info);

                        if (!crawlAll && list.Count >= this.MaxCount)
                        {
                            return(list);
                        }
                    }
                }
            }
            return(list);
        }
コード例 #17
0
        public void DealHtml(IList list, string html, bool crawlAll)
        {
            Parser   parserDtl = new Parser(new Lexer(html));
            NodeList aNodes    = parserDtl.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "lefttable")));

            if (aNodes != null && aNodes.Count > 0)
            {
                Type     typs  = typeof(ATag);
                TableTag table = aNodes[0] as TableTag;
                for (int t = 1; t < table.RowCount - 1; t++)
                {
                    string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty,
                           inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, ctx = string.Empty, CreateTime = string.Empty, HtmlTxt = string.Empty;

                    TableRow tr   = table.Rows[t] as TableRow;
                    ATag     aTag = tr.SearchFor(typeof(ATag), true)[0] as ATag;

                    InfoUrl = aTag.Link;
                    prjName = table.Rows[t].Columns[1].ToPlainTextString().Replace("\r\n", "").Replace("\t", "").Replace("&nbsp;", " ").Trim();
                    endDate = table.Rows[t].Columns[2].ToPlainTextString().Replace("\r\n", "").Replace("\t", "").Replace("&nbsp;", " ").Trim();
                    string htmlDtl = string.Empty;
                    try
                    {
                        htmlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default);
                    }
                    catch (Exception ex)
                    {
                        continue;
                    }
                    Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>");
                    htmlDtl = regexHtml.Replace(htmlDtl, "");
                    Parser parserCtx = new Parser(new Lexer(htmlDtl));

                    NodeList ctxNode = parserCtx.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "printTb lefttable")));
                    if (ctxNode != null && ctxNode.Count > 0)
                    {
                        Parser   parserdiv = new Parser(new Lexer(htmlDtl));
                        NodeList aNodesdiv = parserdiv.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "biuuu_button")));
                        HtmlTxt = ctxNode.AsHtml().Replace(aNodesdiv.AsHtml(), "").Trim();
                        Type     tp        = typeof(ATag);
                        TableTag tabTag    = ctxNode[0] as TableTag;
                        string   startTime = tabTag.Rows[1].Columns[0].ToPlainTextString().Replace("\r\n", "").Replace("\t", "").Replace("&nbsp;", " ").Trim();
                        Regex    regex     = new Regex(@"时间:\d{4}-\d{1,2}-\d{1,2} \d{1,2}:\d{1,2}:\d{1,2}");
                        Match    math      = regex.Match(startTime);
                        beginDate = math.Value.Replace("时间:", "");

                        Regex regexcode = new Regex("(工程编号|项目编号|招标编号):[^\r\n]+[\r\n]{1}");
                        Match match     = regexcode.Match(tabTag.ToPlainTextString());
                        code = match.Value.Substring(match.Value.IndexOf(":") + 1).Replace("\r\n", "").Replace("\t", "").Replace("&nbsp;", " ").Trim();

                        Regex regexBuildUnit = new Regex("(招标人|建设单位|招标采购单位):[^\r\n]+[\r\n]{1}");
                        Match matchBuildUnit = regexBuildUnit.Match(tabTag.ToPlainTextString());
                        buildUnit = matchBuildUnit.Value.Substring(matchBuildUnit.Value.IndexOf(":") + 1).Replace("\r\n", "").Replace("\t", "").Replace("&nbsp;", " ").Trim();

                        Regex regexAddress = new Regex("(建设地点|项目地点|工程地点):[^\r\n]+[\r\n]{1}");
                        Match matchAddress = regexAddress.Match(tabTag.ToPlainTextString());
                        prjAddress = matchAddress.Value.Substring(matchAddress.Value.IndexOf(":") + 1).Replace("\r\n", "").Replace("\t", "").Replace("&nbsp;", " ").Trim();

                        ctx = tabTag.Rows[2].Columns[0].ToPlainTextString().Replace("&nbsp;", " ").Replace("\r\n\r\n\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n");
                        if (ctx.Length > 0)
                        {
                            Regex regexCtx = new Regex("<!--[^<]+-->");
                            ctx = regexCtx.Replace(ctx, "");
                        }

                        if (Encoding.Default.GetByteCount(code) > 50)
                        {
                            code = "";
                        }
                        if (buildUnit == "" || buildUnit == null)
                        {
                            buildUnit = "";
                        }
                        if (Encoding.Default.GetByteCount(buildUnit) > 150)
                        {
                            buildUnit = buildUnit.Substring(0, 150);
                        }
                        if (Encoding.Default.GetByteCount(prjAddress) > 200)
                        {
                            prjAddress = "见招标公告内容";
                        }
                        if (beginDate.Length > 0 && endDate.Length > 0)
                        {
                            DateTime begin = new DateTime();
                            DateTime end   = new DateTime();
                            try
                            {
                                begin = DateTime.Parse(beginDate);
                                end   = DateTime.Parse(endDate);
                            }
                            catch (Exception)
                            {
                            }
                            if (begin > end)
                            {
                                endDate = string.Empty;
                            }
                        }
                    }

                    parserCtx.Reset();

                    ctxNode = parserCtx.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "toptd_bai")));
                    Regex regDate = new Regex(@"\d{4}-\d{1,2}-\d{1,2}");
                    beginDate = regDate.Match(ctxNode.AsString()).Value.Trim();
                    if (beginDate == "")
                    {
                        beginDate = string.Empty;
                    }
                    inviteType = ToolHtml.GetInviteTypes(prjName);
                    InviteInfo info = ToolDb.GenInviteInfo("广东省", "惠州市区", "惠阳区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, ctx, remark, "惠州市建设工程交易中心", inviteType, "建设工程", string.Empty, InfoUrl, HtmlTxt);
                    list.Add(info);
                    ctxNode = parserCtx.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("a"), new HasAttributeFilter("target", "_blank")));
                    NodeList aTagNodes = ctxNode.SearchFor(typeof(ATag), true);
                    for (int a = 0; a < aTagNodes.Count; a++)
                    {
                        ATag fileTage = aTagNodes[a] as ATag;
                        if (fileTage.Link.Contains("http://www.ebc.huizhou.gov.cn/index/loadNewsFile"))
                        {
                            string     downloadURL = fileTage.Link;
                            BaseAttach attach      = ToolDb.GenBaseAttach(fileTage.ToPlainTextString(), info.Id, downloadURL);
                            base.AttachList.Add(attach);
                        }
                    }
                    if (!crawlAll && list.Count >= this.MaxCount)
                    {
                        return;
                    }
                }
            }
        }
コード例 #18
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("form"), new HasAttributeFilter("name", "qPageForm")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    NodeList aNode = new Parser(new Lexer(pageNode.ToHtml())).ExtractAllNodesThatMatch(new TagNameFilter("a"));
                    if (aNode != null && aNode.Count > 0)
                    {
                        string temp = aNode[aNode.Count - 2].GetATagHref().Replace("turnOverPage", "").Replace("(", "").Replace(")", "").Replace(";", "");
                        pageInt = int.Parse(temp);
                    }
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "channelCode", "pageIndex", "pageSize", "pointPageIndexId" }, new string[] {
                        "0008", i.ToString(), "15", "1"
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://maoming.gdgpo.com/queryMoreInfoList.do", nvc, Encoding.UTF8);
                    }
                    catch
                    {
                        continue;
                    }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "m_m_c_list")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        ATag   aTag = listNode[j].GetATag(1);
                        prjName   = aTag.GetAttribute("title");
                        beginDate = listNode[j].ToPlainTextString().GetDateRegex();

                        InfoUrl = "http://maoming.gdgpo.com" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "zw_c_c_cont")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.AsHtml().Replace("<br", "\r\n<br");
                            bidCtx  = HtmlTxt.Replace("</p>", "\r\n").Replace("</pre>", "\r\n").ToCtxString();

                            buildUnit = bidCtx.GetBuildRegex();
                            code      = bidCtx.Replace("(招标编号", "000000").GetCodeRegex().GetCodeDel();
                            if (string.IsNullOrEmpty(code))
                            {
                                code = bidCtx.GetRegex("招标编号", true, 50).GetCodeDel();
                            }
                            string temp = code.GetDateRegex("yyyy年MM月dd日");
                            bidUnit  = bidCtx.GetBidRegex().GetBidUnitDel();
                            bidMoney = bidCtx.GetMoneyString();
                            if (bidMoney.Contains("("))
                            {
                                bidMoney = bidMoney.Remove(bidMoney.IndexOf("(")).GetMoney();
                            }
                            else
                            {
                                bidMoney = bidMoney.GetMoney();
                            }
                            if (bidMoney == "0")
                            {
                                bidMoney = bidCtx.GetMoneyString(null, true);
                                if (bidMoney.Contains("("))
                                {
                                    bidMoney = bidMoney.Remove(bidMoney.IndexOf("(")).GetMoney();
                                }
                                else if (bidMoney.Contains("大写"))
                                {
                                    bidMoney = bidMoney.Remove(bidMoney.IndexOf("大写")).GetMoney();
                                }
                                else
                                {
                                    bidMoney = bidMoney.GetMoney();
                                }
                            }
                            if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney))
                            {
                                bidMoney = bidCtx.GetMoneyString(null, true);
                                if (bidMoney.Contains("大写"))
                                {
                                    bidMoney = bidMoney.Remove(bidMoney.IndexOf("大写")).GetMoney();
                                }
                                else
                                {
                                    bidMoney = bidMoney.GetMoney("万元");
                                }
                            }
                            if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney))
                            {
                                bidMoney = bidCtx.GetMoneyString(null, true).GetMoney();
                            }
                            if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney))
                            {
                                bidMoney = bidCtx.GetMoneyString(new string[] { "¥", "$" }, false).GetMoney();
                            }
                            if (!string.IsNullOrEmpty(bidMoney) && bidMoney != "0" && decimal.Parse(bidMoney) > 10000)
                            {
                                bidMoney = (decimal.Parse(bidMoney) / 10000).ToString();
                            }

                            if (string.IsNullOrEmpty(bidUnit))
                            {
                                bidUnit = bidCtx.Replace(" ", "").GetRegex("成交人,成交候选供应商");
                            }
                            if (bidUnit.Contains("名称"))
                            {
                                bidUnit = bidUnit.Replace("名称", "");
                            }
                            bidUnit  = bidUnit.Replace("-", "");
                            bidType  = prjName.GetInviteBidType();
                            msgType  = "茂名市政府采购";
                            specType = "政府采购";

                            BidInfo info = ToolDb.GenBidInfo("广东省", "茂名市区", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList fileNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (fileNode != null && fileNode.Count > 0)
                            {
                                for (int k = 0; k < fileNode.Count; k++)
                                {
                                    ATag fileAtag = fileNode[k].GetATag();
                                    if (fileAtag.IsAtagAttach())
                                    {
                                        string fileName = fileAtag.LinkText.ToNodeString().Replace(" ", "");
                                        string fileLink = fileAtag.Link;
                                        if (!fileLink.ToLower().Contains("http"))
                                        {
                                            fileLink = "http://maoming.gdgpo.gov.cn" + fileAtag.Link;
                                        }
                                        if (Encoding.Default.GetByteCount(fileLink) < 500)
                                        {
                                            base.AttachList.Add(ToolDb.GenBaseAttach(fileName, info.Id, fileLink));
                                        }
                                    }
                                }
                            }
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
コード例 #19
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default, ref cookiestr);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("nowrap", "true")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("总页数", "当前页").Replace(":", "");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState = this.ToolWebSite.GetAspNetViewState(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__VIEWSTATE",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT"
                    }, new string[] {
                        viewState,
                        "MoreInfoList1$Pager",
                        i.ToString()
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        prjName   = aTag.GetAttribute("title");
                        beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://www.dyggzyjyzx.com" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml();
                            inviteCtx  = HtmlTxt.ToCtxString();
                            prjAddress = inviteCtx.GetAddressRegex();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            if (buildUnit.Contains("公司"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                            }
                            if (buildUnit.Contains("地址"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址"));
                            }
                            if (buildUnit.Contains("招标代理"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理"));
                            }
                            code       = inviteCtx.GetCodeRegex().GetCodeDel();
                            msgType    = "大冶市公共资源交易中心";
                            specType   = "建设工程";
                            inviteType = prjName.GetInviteBidType();
                            buildUnit  = buildUnit.Replace(" ", "");
                            InviteInfo info = ToolDb.GenInviteInfo("湖北省", "湖北省及地市", "大冶市", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.dyggzyjyzx.com/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
コード例 #20
0
ファイル: InviteMeiZhoouJS.cs プロジェクト: SHNXJMG/Small
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default, ref cookiestr);
                Regex regexHtml = new Regex(@"<script[^<]*</script>");
                htl = regexHtml.Replace(htl, "");
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser    = new Parser(new Lexer(htl));
            NodeList nodeList  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("align", "right")));
            Regex    regexPage = new Regex(@"共\d+页");

            try
            {
                page = int.Parse(regexPage.Match(nodeList.AsString()).Value.Trim(new char[] { '共', '页' }));
            }
            catch (Exception)
            { }
            for (int i = 1; i < page; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl + "&otype=&pageNum=" + i.ToString()), Encoding.Default);
                        Regex regexHtml = new Regex(@"<script[^<]*</script>");
                        htl = regexHtml.Replace(htl, "");
                    }
                    catch (Exception ex) { continue; }
                }
                parser = new Parser(new Lexer(htl));
                NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellpadding", "1")));
                if (tableNodeList != null && tableNodeList.Count > 0)
                {
                    TableTag table = (TableTag)tableNodeList[0];
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string   code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr = table.Rows[j];
                        prjName   = tr.Columns[0].ToPlainTextString().Trim();
                        beginDate = tr.Columns[1].ToPlainTextString().Replace("&nbsp; ", "").Trim().Substring(0, 10);
                        ATag aTag = tr.Columns[0].SearchFor(typeof(ATag), true)[0] as ATag;
                        InfoUrl = "http://market.meizhou.gov.cn/website/deptwebsite/1925/Content.jsp?issueId=15488&msgType=00&filePath=" + aTag.GetAttribute("onclick").Replace("showDeptContent('1925','", "");
                        int    ii = InfoUrl.IndexOf("'");
                        string oo = InfoUrl.Remove(ii).Trim();
                        if (oo.Contains("content.php"))
                        {
                            string url = aTag.GetAttribute("onclick").Replace("showDeptContent('1925','", "");
                            ii = url.IndexOf("'");
                            oo = url.Remove(ii).ToString();
                        }
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(oo), Encoding.Default).Replace("&nbsp;", "");
                            Regex regexHtml = new Regex(@"<script[^<]*</script>");
                            htmldetail = regexHtml.Replace(htmldetail, "");
                        }
                        catch (Exception)
                        {
                            Logger.Error("InviteMeiZhoouJS");
                            continue;
                        }
                        Parser   parserdetail = new Parser(new Lexer(htmldetail));
                        NodeList dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("P"), new HasAttributeFilter("class", "MsoNormal")));
                        if (dtnode == null || dtnode.Count < 1)
                        {
                            parserdetail = new Parser(new Lexer(htmldetail));
                            dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "WordSection1")));
                        }
                        if (dtnode.Count > 0 && dtnode != null)
                        {
                            HtmlTxt = dtnode.AsHtml();
                            for (int k = 0; k < dtnode.Count; k++)
                            {
                                string tr1 = string.Empty;
                                tr1 = dtnode[k].ToPlainTextString().Replace(" ", "").Trim();
                                if (k == 0)
                                {
                                    string InvType = tr1;
                                    inviteType = ToolHtml.GetInviteTypes(InvType);
                                }
                                inviteCtx += tr1 + ":" + "\r\n";
                            }
                            Regex regPrjAddr = new Regex(@"(工程地点|建设地点):[^\r\n]+\r\n");
                            prjAddress = regPrjAddr.Match(inviteCtx).Value.Replace("工程地点:", "").Replace("建设地点", "").Replace(":", "").Replace(";", "").Trim();

                            Regex bildUnit = new Regex(@"(招标人|招标人(盖章)|招标人):[^\r\n]+[\r\n]{1}");
                            buildUnit = bildUnit.Match(inviteCtx).Value.Replace("招  标人:", "").Replace("招标人(盖章):", "").Replace("招标人:", "").Trim();
                            if (buildUnit != "" && buildUnit.Contains(":"))
                            {
                                int zz = buildUnit.IndexOf(":");
                                buildUnit = buildUnit.Remove(zz).ToString();
                            }
                            Regex regcode = new Regex(@"(招标项目编号|项目编号)(:|:)[^\r\n]+[\r\n]{1}");
                            code = regcode.Match(inviteCtx).Value.Replace("招标项目编号", "").Replace("项目编号", "").Replace(":", "").Replace(":", "").Trim();

                            Regex  regoType = new Regex(@"工程类型:[^\r\n]+\r\n");
                            string oType    = regoType.Match(inviteCtx).Value.Replace("工程类型:", "").Trim();
                            if (oType.Contains("房建"))
                            {
                                otherType = "房建及工业民用建筑";
                            }
                            else if (oType.Contains("市政"))
                            {
                                otherType = "市政工程";
                            }
                            else if (oType.Contains("园林绿化"))
                            {
                                otherType = "园林绿化工程";
                            }
                            else if (oType.Contains("装饰") || oType.Contains("装修"))
                            {
                                otherType = "装饰装修工程";
                            }
                            else if (oType.Contains("电力"))
                            {
                                otherType = "电力工程";
                            }
                            else if (oType.Contains("水利"))
                            {
                                otherType = "水利工程";
                            }
                            if (oType.Contains("环保"))
                            {
                                otherType = "环保工程";
                            }
                            if (buildUnit == "")
                            {
                                buildUnit = "";
                            }
                            if (buildUnit.Contains("梅州市建设工程交易中心"))
                            {
                                buildUnit = "";
                            }
                            msgType   = "梅州市建设工程交易中心";
                            specType  = "建设工程";
                            inviteCtx = inviteCtx.Replace("<?", "").Replace("xml:namespace prefix = o ns = ", "").Replace("urn:schemas-microsoft-com:office:office", "").Replace("/>", "").Trim();
                            inviteCtx = inviteCtx.Replace("<?", "").Replace("xml:namespace prefix = ns0 ns = ", "").Replace("urn:schemas-microsoft-com:office:smarttags", "").Replace("/>", "").Trim();
                            inviteCtx = inviteCtx.Replace("<?", "").Replace("xml:namespace prefix = st1 ns = ", "").Replace("urn:schemas-microsoft-com:office:smarttags", "").Replace("/>", "").Trim();
                            inviteCtx = inviteCtx.Replace("?xml:namespaceprefix=o/>", "").Trim();
                            inviteCtx = inviteCtx.Replace("<?xml:namespaceprefix=st1/>", "").Trim();
                            if (Encoding.Default.GetByteCount(code) >= 50)
                            {
                                code = string.Empty;
                            }
                            if (Encoding.Default.GetByteCount(prjAddress) >= 150)
                            {
                                prjAddress = string.Empty;
                            }
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "梅州市区", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, oo, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
コード例 #21
0
ファイル: NotifyInfoBeiJingYl.cs プロジェクト: SHNXJMG/Small
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list = new List <NotifyInfo>();
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string cookiestr       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default, ref cookiestr);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "gridview_PagerRow")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("/", "&nbsp");
                    pageInt = int.Parse(temp);
                }
                catch
                {
                }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(
                        new string[] { "__EVENTTARGET",
                                       "__EVENTARGUMENT",
                                       "__LASTFOCUS",
                                       "__VIEWSTATE",
                                       "__VIEWSTATEGENERATOR",
                                       "__EVENTVALIDATION",
                                       "keyTextBox",
                                       "PagerControl1:_ctl4",
                                       "PagerControl1:_ctl2.x",
                                       "PagerControl1:_ctl2.y" },
                        new string[] {
                        "", "", "",
                        viewState,
                        "7CE136E4",
                        eventValidation,
                        "",
                        "",
                        "3", "5"
                    }
                        );
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MyGridView1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        headName    = aTag.LinkText;
                        releaseTime = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        infoUrl     = aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "PopupBody_context")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            if (Encoding.Default.GetByteCount(headName) > 200)
                            {
                                headName = headName.Substring(0, 100);
                            }
                            ctxHtml = dtlNode.AsHtml();
                            infoCtx = ctxHtml.ToCtxString();
                            List <string> listImg = new List <string>();
                            parser = new Parser(new Lexer(ctxHtml));
                            NodeList imgNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                            if (imgNode != null && imgNode.Count > 0)
                            {
                                for (int m = 0; m < imgNode.Count; m++)
                                {
                                    string link = "http://publish.bcactc.com" + (imgNode[m] as ImageTag).ImageURL;
                                    listImg.Add(link);
                                    ctxHtml = ctxHtml.GetReplace((imgNode[m] as ImageTag).ImageURL, link);
                                }
                            }

                            msgType = "北京市建设工程发包承包交易中心";
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "北京市", "北京市区", "", infoCtx, "通知公告");
                            sqlCount++;
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                            {
                                if (listImg.Count > 0)
                                {
                                    for (int a = 0; a < listImg.Count; a++)
                                    {
                                        BaseAttach entity = null;
                                        try
                                        {
                                            entity = ToolHtml.GetBaseAttach(listImg[0], headName, info.Id);
                                            if (entity != null)
                                            {
                                                ToolDb.SaveEntity(entity, string.Empty);
                                            }
                                        }
                                        catch { }
                                    }
                                }
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag fileATag = aNode[k].GetATag();
                                        if (fileATag.IsAtagAttach())
                                        {
                                            BaseAttach obj = null;
                                            try
                                            {
                                                if (fileATag.Link.ToLower().Contains("http"))
                                                {
                                                    obj = ToolHtml.GetBaseAttach(fileATag.Link, headName, info.Id);
                                                }
                                                else
                                                {
                                                    obj = ToolHtml.GetBaseAttach("http://publish.bcactc.com/" + fileATag.Link, headName, info.Id);
                                                }
                                            }
                                            catch { }
                                            if (obj != null)
                                            {
                                                ToolDb.SaveEntity(obj, string.Empty);
                                            }
                                        }
                                    }
                                }
                            }
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                        }
                    }
                }
            }
            return(list);
        }
コード例 #22
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string cookiestr       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "pager")), true), new TagNameFilter("a")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    Regex  reg  = new Regex(@"[0-9]+");
                    string temp = reg.Match(pageNode[pageNode.Count - 1].GetATagHref().Replace("&#39;", "")).Value;
                    pageInt = int.Parse(temp);
                }
                catch
                {
                }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(
                        new string[] { "__VIEWSTATE",
                                       "__EVENTTARGET",
                                       "__EVENTARGUMENT",
                                       "__VIEWSTATEENCRYPTED",
                                       "__EVENTVALIDATION",
                                       "hsa1$DD_LX",
                                       "hsa1$wd",
                                       "pager_input" },
                        new string[] {
                        viewState,
                        "pager",
                        i.ToString(),
                        "",
                        eventValidation,
                        "综合搜索",
                        "",
                        (i - 1).ToString()
                    }
                        );
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "GV1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, area = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        TableRow tr = table.Rows[j];
                        prjName = tr.Columns[0].ToNodePlainString();
                        if (prjName.Contains("["))
                        {
                            area    = prjName.Replace("[", "【").Replace("]", "】").GetRegexBegEnd("【", "】");
                            prjName = prjName.Replace("[" + area + "]", "");
                        }
                        beginDate = tr.Columns[1].ToPlainTextString();
                        InfoUrl   = "http://www.ycsggzy.cn/" + tr.Columns[0].GetATagHref().Replace("&amp;", "&");
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "Lb_nr")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml().GetReplace("<br>", "<br />");
                            inviteCtx  = HtmlTxt.ToCtxString();
                            code       = inviteCtx.GetCodeRegex();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            prjAddress = inviteCtx.GetAddressRegex().Replace(" ", "");
                            inviteType = prjName.GetInviteBidType();
                            specType   = "建设工程";
                            msgType    = "银川市公共资源交易中心";
                            InviteInfo info = ToolDb.GenInviteInfo("宁夏回族自治区", "宁夏回族自治区及地市", "银川市", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
コード例 #23
0
ファイル: ItemPlanSz.cs プロジェクト: SHNXJMG/Small
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "arContent")));

            if (pageNode != null && pageNode.Count > 0)
            {
                TableTag pageTable = pageNode[0] as TableTag;
                string   temp      = pageTable.Rows[pageTable.RowCount - 1].ToNodePlainString().Replace("createPageHTML", "").Replace("0,", "").Replace("(", "").Replace(")", "").Replace("index", "").Replace("htm", "").Replace(",", "").Replace("\"", "").Replace(";", "").Trim();
                try
                {
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "/index_" + (i - 1).ToString() + ".htm", Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "arContent")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        TableRow tr = table.Rows[j];
                        string   ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;

                        ATag aTag = tr.Columns[1].GetATag();
                        ItemName = aTag.GetAttribute("title");
                        ItemCode = tr.Columns[2].ToNodePlainString();
                        PlanDate = tr.Columns[3].ToPlainTextString().GetDateRegex();

                        InfoUrl = this.SiteUrl + aTag.Link.Replace("../", "").Replace("./", "");
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "detail")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            CtxHtml = dtlNode.AsHtml();
                            ItemCtx = CtxHtml.ToCtxString();
                            string ctx = string.Empty;
                            parser = new Parser(new Lexer(CtxHtml));
                            NodeList dtlTable = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "100%")));
                            if (dtlTable != null && dtlTable.Count > 0)
                            {
                                TableTag tableTag = dtlTable[0] as TableTag;
                                for (int k = 0; k < tableTag.RowCount; k++)
                                {
                                    for (int c = 0; c < tableTag.Rows[k].ColumnCount; c++)
                                    {
                                        if (c % 2 == 0)
                                        {
                                            ctx += tableTag.Rows[k].Columns[c].ToNodePlainString().Replace(":", "").Replace(":", "") + ":";
                                        }
                                        else
                                        {
                                            ctx += tableTag.Rows[k].Columns[c].ToNodePlainString() + "\r\n";
                                        }
                                    }
                                }
                            }

                            MsgUnit = ctx.GetRegex("发布单位");
                            if (string.IsNullOrEmpty(MsgUnit))
                            {
                                MsgUnit = "发改委";
                            }
                            PlanType = "项目审批信息";
                            MsgType  = "深圳市发展和改革委员会";

                            ItemPlan info = ToolDb.GenItemPlan("广东省", "深圳市区", "", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);

                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
コード例 #24
0
ファイル: NoticeGzgggs.cs プロジェクト: SHNXJMG/Small
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <NoticeInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch (Exception ex)
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "pagination page-mar")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("/共", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&page=" + i, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "wsbs-table")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string InfoTitle = string.Empty, InfoType = string.Empty, PublistTime = string.Empty, InfoCtx = string.Empty, InfoUrl = string.Empty, prjCode = string.Empty, buildUnit = string.Empty, htmlTxt = string.Empty;

                        TableRow tr = table.Rows[j];
                        InfoTitle   = tr.Columns[1].ToNodePlainString();
                        InfoType    = "变更公示";
                        PublistTime = tr.Columns[2].ToPlainTextString().GetDateRegex();

                        InfoUrl = "http://www.gzggzy.cn" + tr.Columns[1].GetATagHref();
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch
                        { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "xx-main")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            htmlTxt = dtlNode.AsHtml().GetJsString().Replace("&lt;", "<").Replace("&gt;", ">").Replace("&quot;", "\"").Replace("&amp;", "&").Replace("&lquot;", "").Replace("&rdquo;", "");
                            InfoCtx = htmlTxt.Replace("</p>", "").Replace("<br/>", "").Replace("<br>", "").ToCtxString().Replace("&plusmn;", "").Replace("&ldquot;", "").Replace("&ldquo;", "");
                            prjCode = InfoCtx.GetCodeRegex().GetChina().GetCodeDel().Replace("&ltbr", "").Replace("/&gt", "");

                            buildUnit = InfoCtx.GetBuildRegex(null, true, 100);
                            if (buildUnit.Contains("联系"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("联系"));
                            }
                            if (buildUnit.Contains("地址"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址"));
                            }
                            NoticeInfo info = ToolDb.GenNoticeInfo("广东省", "广州政府采购", string.Empty, string.Empty, InfoTitle, InfoType, InfoCtx, PublistTime, string.Empty, "广州公共资源交易中心", InfoUrl, prjCode, buildUnit, string.Empty, string.Empty, string.Empty, string.Empty, htmlTxt);
                            parser = new Parser(new Lexer(htmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag aTag = aNode[k].GetATag();
                                    if (aTag.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (aTag.Link.ToLower().Contains("http"))
                                        {
                                            link = aTag.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.gzggzy.cn" + aTag.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(aTag.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }

                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
コード例 #25
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch { return(list); }
            Parser parser = new Parser(new Lexer(htl));

            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "easysite-total-page")));

            if (nodeList != null && nodeList.Count > 0)
            {
                string temp = nodeList.AsString();
                try
                {
                    page = int.Parse(temp.GetRegexBegEnd("1/", "\n"));
                }
                catch { }
            }
            if (page == 1)
            {
                page = 42;
            }
            for (int i = 1; i <= page; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl("http://www.szgm.gov.cn/szgm/132100/xwdt17/135204/151246/8d25503a-" + i.ToString() + ".html", Encoding.UTF8);
                    }
                    catch { return(list); }
                }
                parser = new Parser(new Lexer(htl));
                //  NodeList tabList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellspacing", "0")), true), new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellspacing", "0"))));

                NodeList tabList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellspacing", "0"))), new TagNameFilter("tr")));

                //NodeList tabList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("li"), new HasAttributeFilter("class", "easysite-article-li")));
                if (tabList != null && tabList.Count > 0)
                {
                    for (int j = 0; j < tabList.Count; j++)
                    {
                        ATag     aTag = null;
                        TableRow tr   = null;
                        try
                        {
                            tr   = (tabList[j] as TableTag).Rows[0];
                            aTag = tr.GetATag();
                            if (aTag == null || tr.ColumnCount != 3)
                            {
                                continue;
                            }
                        }
                        catch { continue; }
                        string code = string.Empty, buildUnit = string.Empty,
                               prjName = string.Empty, prjAddress = string.Empty,
                               inviteCtx = string.Empty, bidType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty,
                               endDate = string.Empty, remark = string.Empty,
                               inviteType = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty,
                               otherType = string.Empty, HtmlTxt = string.Empty;

                        beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        prjName   = aTag.GetAttribute("title");

                        InfoUrl = "http://www.szgm.gov.cn" + aTag.Link;
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "article_body")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt   = dtlNode.AsHtml();
                            inviteCtx = HtmlTxt.ToCtxString();

                            code       = inviteCtx.GetCodeRegex().GetCodeDel();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            prjAddress = inviteCtx.GetBuildRegex();
                            inviteType = prjName.GetInviteBidType();
                            specType   = "政府采购";
                            msgType    = "深圳市光明新区";
                            if (string.IsNullOrEmpty(buildUnit))
                            {
                                buildUnit = msgType;
                            }
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳区及街道工程", "光明新区",
                                                                   string.Empty, code, prjName, prjAddress, buildUnit,
                                                                   beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
コード例 #26
0
ファイル: NoticeJiLinGgzy.cs プロジェクト: SHNXJMG/Small
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <NoticeInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default, ref cookiestr);
            }
            catch { }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("nowrap", "true")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("总页数:", "当前");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__LASTFOCUS",
                        "__VIEWSTATE",
                        "__EVENTVALIDATION"
                    }, new string[] {
                        "Pager",
                        i.ToString(),
                        "",
                        viewState,
                        eventValidation
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("tr"));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        TableRow tr = listNode[j] as TableRow;
                        if (tr.ColumnCount != 6)
                        {
                            continue;
                        }
                        ATag aTag = tr.GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }
                        string prjType = tr.Columns[2].ToNodePlainString().GetReplace("[", "【").GetReplace("]", "】").GetRegexBegEnd("【", "】");
                        if (!prjType.Contains("水利工程") && !prjType.Contains("建设工程") && !prjType.Contains("交通工程"))
                        {
                            continue;
                        }
                        string InfoTitle = string.Empty, InfoType = string.Empty, PublistTime = string.Empty, InfoCtx = string.Empty, InfoUrl = string.Empty, prjCode = string.Empty, buildUnit = string.Empty, htmlTxt = string.Empty;
                        InfoType  = "变更公示";
                        InfoTitle = aTag.GetAttribute("title");
                        InfoUrl   = "http://ggzyjy.jl.gov.cn/JiLinZtb/" + aTag.Link.GetReplace("../,./");
                        string area = tr.Columns[3].ToNodePlainString().GetReplace("[", "【").GetReplace("]", "】").GetRegexBegEnd("【", "】");
                        PublistTime = tr.Columns[4].ToPlainTextString().GetDateRegex();
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            htmlTxt   = dtlNode.AsHtml();
                            InfoCtx   = htmlTxt.GetReplace("</p>,<br />,<br/>", "\r\n").ToCtxString();
                            prjCode   = InfoCtx.GetCodeRegex().GetCodeDel();
                            buildUnit = InfoCtx.GetBuildRegex();
                            if (buildUnit.Contains("公司"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                            }
                            if (buildUnit.Contains("联系"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("联系"));
                            }
                            if (buildUnit.Contains("地址"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址"));
                            }
                            NoticeInfo info = ToolDb.GenNoticeInfo("吉林省", "吉林省及地市", area, string.Empty, InfoTitle, InfoType, InfoCtx, PublistTime, string.Empty, "吉林省公共资源交易中心", InfoUrl, prjCode, buildUnit, string.Empty, string.Empty, "建设工程", prjType, htmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(htmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://ggzyjy.jl.gov.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
コード例 #27
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            //取得页码
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch (Exception ex)
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "scott")), true), new TagNameFilter("a")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    string temp = pageList[pageList.Count - 1].GetATagValue().Replace("(", "kdxx").Replace(")", "xxdk").GetRegexBegEnd("kdxx", "xxdk");
                    pageInt = Convert.ToInt32(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "lefttable")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        TableRow tr = table.Rows[j];
                        infoType = "办事指南";

                        headName    = tr.Columns[1].ToNodePlainString();
                        releaseTime = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        infoUrl     = tr.Columns[1].GetATagHref();
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString().Replace("<?xml:namespace prefix = o ns = \"urn:schemas-microsoft-com:office:office\" />", "");
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "context_div")));
                        if (dtlList != null && dtlList.Count > 0)
                        {
                            ctxHtml = dtlList.AsHtml();
                            infoCtx = ctxHtml.ToCtxString();
                            msgType = MsgTypeCosnt.HuiZhouMsgType;
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "惠州市区", string.Empty, infoCtx, infoType);
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            else
                            {
                                sqlCount++;
                                if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                                {
                                    parser = new Parser(new Lexer(ctxHtml));
                                    NodeList imgList = parser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                                    if (imgList != null && imgList.Count > 0)
                                    {
                                        for (int img = 0; img < imgList.Count; img++)
                                        {
                                            ImageTag   imgTag   = imgList[img] as ImageTag;
                                            BaseAttach baseInfo = ToolHtml.GetBaseAttachByUrl(imgTag.GetAttribute("src"), headName, info.Id);
                                            if (baseInfo != null)
                                            {
                                                ToolDb.SaveEntity(baseInfo, string.Empty);
                                            }
                                        }
                                    }
                                    parser = new Parser(new Lexer(ctxHtml));
                                    NodeList attachList = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                    if (attachList != null && attachList.Count > 0)
                                    {
                                        for (int a = 0; a < attachList.Count; a++)
                                        {
                                            ATag aTag = attachList[a] as ATag;
                                            if (aTag.IsAtagAttach())
                                            {
                                                BaseAttach obj = ToolHtml.GetBaseAttachByUrl(aTag.Link, aTag.LinkText, info.Id);
                                                if (obj != null)
                                                {
                                                    ToolDb.SaveEntity(obj, string.Empty);
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
コード例 #28
0
ファイル: BidShanXiJsgc.cs プロジェクト: SHNXJMG/Small
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("nowrap", "true")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("总页数:", "当");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(
                        new string[] {
                        "__VIEWSTATE",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__EVENTVALIDATION",
                        "MoreInfoList1$txtTitle"
                    },
                        new string[] {
                        viewState,
                        "MoreInfoList1$Pager",
                        i.ToString(),
                        eventValidation,
                        ""
                    }
                        );
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string   prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        prjName   = aTag.GetAttribute("title");
                        beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        string temp = tr.Columns[1].ToNodePlainString();
                        if (temp.Contains("[") && temp.Contains("]"))
                        {
                            area = temp.Substring(temp.IndexOf("["), temp.IndexOf("]") - temp.IndexOf("[")).GetReplace("[,]");
                        }
                        InfoUrl = "http://www.sxszbb.com" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.AsHtml();
                            bidCtx  = HtmlTxt.ToCtxString();

                            buildUnit = bidCtx.GetBuildRegex();
                            code      = bidCtx.GetCodeRegex().GetCodeDel();
                            bidMoney  = bidCtx.GetMoneyRegex();
                            bidUnit   = bidCtx.GetBidRegex();
                            if (string.IsNullOrWhiteSpace(bidUnit))
                            {
                                bidUnit = bidCtx.GetRegex("第一名");
                            }
                            prjMgr = bidCtx.GetMgrRegex();
                            if (string.IsNullOrWhiteSpace(bidUnit))
                            {
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                                if (tableNode != null && tableNode.Count > 0)
                                {
                                    string   ctx = string.Empty;
                                    TableTag tag = tableNode[0] as TableTag;
                                    bool     isBreak = false, rBreak = false;
                                    for (int r = 0; r < tag.RowCount; r++)
                                    {
                                        for (int c = 0; c < tag.Rows[r].ColumnCount; c++)
                                        {
                                            string strTemp = tag.Rows[r].Columns[c].ToNodePlainString();
                                            if (strTemp.Contains("评标结果"))
                                            {
                                                isBreak = true;
                                                break;
                                            }
                                            if (isBreak)
                                            {
                                                rBreak = true;
                                                try
                                                {
                                                    ctx += tag.Rows[r].Columns[c].ToNodePlainString().GetReplace(":,:") + ":";
                                                    ctx += tag.Rows[r + 1].Columns[c].ToNodePlainString().GetReplace(":,:") + "\r\n";
                                                }
                                                catch { }
                                            }
                                            else
                                            {
                                                if ((c + 1) % 2 == 0)
                                                {
                                                    ctx += strTemp.GetReplace(":,:") + "\r\n";
                                                }
                                                else
                                                {
                                                    ctx += strTemp.GetReplace(":,:") + ":";
                                                }
                                            }
                                        }
                                        if (rBreak)
                                        {
                                            break;
                                        }
                                    }
                                    bidUnit = ctx.GetBidRegex();
                                    if (string.IsNullOrWhiteSpace(bidMoney) || bidMoney == "0")
                                    {
                                        bidMoney = ctx.GetMoneyRegex();
                                    }
                                    if (string.IsNullOrWhiteSpace(prjMgr))
                                    {
                                        prjMgr = ctx.GetMgrRegex();
                                    }
                                    if (string.IsNullOrWhiteSpace(buildUnit))
                                    {
                                        buildUnit = ctx.GetBuildRegex();
                                    }
                                    if (string.IsNullOrWhiteSpace(code))
                                    {
                                        code = ctx.GetCodeRegex().GetCodeDel();
                                    }
                                }
                            }


                            if (buildUnit.Contains("单位章"))
                            {
                                buildUnit = string.Empty;
                            }
                            if (buildUnit.Contains("联系人"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("联系人"));
                            }
                            if (prjMgr.Contains("中标"))
                            {
                                prjMgr = string.Empty;
                            }
                            specType = bidType = "建设工程";
                            msgType  = "陕西省建设工程招标投标管理办公室";
                            BidInfo info = ToolDb.GenBidInfo("陕西省", "陕西省及地市", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
コード例 #29
0
ファイル: InviteFszfcg.cs プロジェクト: SHNXJMG/Small
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("form"), new HasAttributeFilter("name", "qPageForm")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    NodeList aNode = new Parser(new Lexer(pageNode.ToHtml())).ExtractAllNodesThatMatch(new TagNameFilter("a"));
                    if (aNode != null && aNode.Count > 0)
                    {
                        string temp = aNode[aNode.Count - 2].GetATagHref().Replace("turnOverPage", "").Replace("(", "").Replace(")", "").Replace(";", "");
                        pageInt = int.Parse(temp);
                    }
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "channelCode", "pageIndex", "pageSize", "pointPageIndexId" }, new string[] {
                        "0005", i.ToString(), "15", "1"
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://foshan.gdgpo.com/queryMoreInfoList.do", nvc, Encoding.UTF8);
                    }
                    catch
                    {
                        continue;
                    }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "m_m_c_list")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        ATag aTag = listNode[j].GetATag(1);
                        prjName   = aTag.GetAttribute("title");
                        beginDate = listNode[j].ToPlainTextString().GetDateRegex();

                        InfoUrl = "http://foshan.gdgpo.com" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "zw_c_c_cont")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt   = dtlNode.AsHtml();//.Replace("<br", "\r\n<br");
                            inviteCtx = HtmlTxt.ToCtxString();
                            code      = inviteCtx.GetCodeRegex();
                            if (code.Contains("&lt"))
                            {
                                code = code.Remove(code.IndexOf("&lt"));
                            }
                            if (code.Contains("("))
                            {
                                code = code.Remove(code.IndexOf("("));
                            }
                            buildUnit  = inviteCtx.GetBuildRegex();
                            prjAddress = inviteCtx.GetAddressRegex();
                            inviteType = prjName.GetInviteBidType();
                            msgType    = "佛山市政府采购";
                            specType   = "政府采购";
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "佛山市区", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList fileNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (fileNode != null && fileNode.Count > 0)
                            {
                                for (int k = 0; k < fileNode.Count; k++)
                                {
                                    ATag fileAtag = fileNode[k].GetATag();
                                    if (fileAtag.IsAtagAttach())
                                    {
                                        string fileName = fileAtag.LinkText.ToNodeString().Replace(" ", "");
                                        string fileLink = fileAtag.Link;
                                        if (!fileLink.ToLower().Contains("http"))
                                        {
                                            fileLink = "http://foshan.gdgpo.gov.cn" + fileAtag.Link;
                                        }
                                        base.AttachList.Add(ToolDb.GenBaseAttach(fileName, info.Id, fileLink));
                                    }
                                }
                            }
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
コード例 #30
0
ファイル: InviteHzHengLi.cs プロジェクト: SHNXJMG/Small
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new TagNameFilter("select"));

            if (sNode != null && sNode.Count > 0)
            {
                try
                {
                    SelectTag selTag = sNode[0] as SelectTag;
                    string    temp   = selTag.OptionTags[selTag.OptionTags.Length - 1].ToNodePlainString();
                    pageInt = Convert.ToInt32(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&page=" + i, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList viewList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "taxis_border")));
                if (viewList != null && viewList.Count > 0)
                {
                    TableTag table = viewList[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        TableRow tr = table.Rows[j];
                        string   prjName = string.Empty, InfoUrl = string.Empty, beginDate = string.Empty, HtmlTxt = string.Empty;
                        ATag     aTag = tr.GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }
                        prjName   = aTag.LinkText.Trim();
                        InfoUrl   = "http://www.hzhlz.gov.cn/" + aTag.Link;
                        beginDate = tr.Columns[3].ToPlainTextString().GetDateRegex();
                        string htlDtl = string.Empty;
                        try
                        {
                            htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htlDtl));
                        NodeList dtl = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "newsContent")));
                        if (dtl != null && dtl.Count > 0)
                        {
                            HtmlTxt = dtl.AsHtml();
                            string ctx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString();

                            parser.Reset();
                            NodeList nodeName = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "font1")));
                            if (nodeName != null && nodeName.Count > 0)
                            {
                                prjName = nodeName[0].ToNodePlainString().GetReplace(" ").Trim();
                            }
                            else
                            {
                                continue;
                            }
                            if (prjName.Contains("中标") || prjName.Contains("成交") || prjName.Contains("结果"))
                            {
                                string buildUnit = string.Empty, bidUnit = string.Empty,
                                       bidMoney = string.Empty, code = string.Empty,
                                       bidDate = string.Empty,
                                       endDate = string.Empty, bidType = string.Empty,
                                       specType = string.Empty,
                                       msgType = string.Empty, bidCtx = string.Empty,
                                       prjAddress = string.Empty, remark = string.Empty,
                                       prjMgr = string.Empty, otherType = string.Empty;
                                bidCtx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString();

                                code      = bidCtx.GetCodeRegex().GetCodeDel();
                                buildUnit = bidCtx.GetBuildRegex();
                                if (buildUnit.Contains("招标代理"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理"));
                                }
                                if (buildUnit.Contains("公司"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                                }

                                bidUnit = bidCtx.GetBidRegex();
                                if (string.IsNullOrEmpty(bidUnit))
                                {
                                    bidUnit = bidCtx.GetRegex("中标候选公司");
                                }
                                bidMoney = bidCtx.GetMoneyRegex();
                                try
                                {
                                    if (decimal.Parse(bidMoney) > 100000)
                                    {
                                        bidMoney = (decimal.Parse(bidMoney) / 10000).ToString();
                                    }
                                }
                                catch { }
                                msgType  = "广东省惠州市惠城区横沥镇人民政府";
                                specType = "政府采购";
                                bidType  = prjName.GetInviteBidType();
                                BidInfo info = ToolDb.GenBidInfo("广东省", "惠州市区", "惠城区", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType,
                                                                 bidMoney, InfoUrl, prjMgr, HtmlTxt);
                                list.Add(info);
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k].GetATag();
                                        if (a.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = a.Link;
                                            }
                                            else
                                            {
                                                link = "http://www.hzhlz.gov.cn/" + a.Link;
                                            }
                                            BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                            base.AttachList.Add(attach);
                                        }
                                    }
                                }
                                if (!crawlAll && list.Count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }
                            else
                            {
                                string code = string.Empty, buildUnit = string.Empty,
                                       prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                                       specType = string.Empty, endDate = string.Empty,
                                       remark = string.Empty, inviteCon = string.Empty,
                                       CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty;

                                inviteCtx  = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString();
                                inviteType = prjName.GetInviteBidType();


                                code       = inviteCtx.GetCodeRegex().GetCodeDel();
                                buildUnit  = inviteCtx.GetBuildRegex();
                                prjAddress = inviteCtx.GetAddressRegex();
                                if (buildUnit.Contains("招标代理"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理"));
                                }
                                if (buildUnit.Contains("公司"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                                }


                                msgType = "广东省惠州市惠城区横沥镇人民政府";

                                specType = "政府采购";

                                InviteInfo info = ToolDb.GenInviteInfo("广东省", "惠州市区", "惠城区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                                list.Add(info);

                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k].GetATag();
                                        if (a.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = a.Link;
                                            }
                                            else
                                            {
                                                link = "http://www.hzhlz.gov.cn/" + a.Link;
                                            }
                                            BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                            base.AttachList.Add(attach);
                                        }
                                    }
                                }
                                if (!crawlAll && list.Count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }