Example #1
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("vAlign", "bottom")));

            if (sNode != null && sNode.Count > 0)
            {
                try
                {
                    string temp = sNode.AsString();
                    Regex  reg  = new Regex(@",共[^页]+页");
                    string page = reg.Match(temp).Value.Replace(",共", "").Replace("页", "");
                    pageInt = Convert.ToInt32(page);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.ydszj.gov.cn/channel/138006/content?pageNo=" + i.ToString(), Encoding.Default);
                    }
                    catch
                    {
                        continue;
                    }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("style", "height:350px;"))), new HasAttributeFilter("li")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    for (int j = 0; j < nodeList.Count; j++)
                    {
                        string prjType = nodeList[j].ToPlainTextString();
                        if (prjType.Contains("中标") || prjType.Contains("结果"))
                        {
                            string prjName = string.Empty,
                                   buildUnit = string.Empty, bidUnit = string.Empty,
                                   bidMoney = string.Empty, code = string.Empty,
                                   bidDate = string.Empty,
                                   beginDate = string.Empty,
                                   endDate = string.Empty, bidType = string.Empty,
                                   specType = string.Empty, InfoUrl = string.Empty,
                                   msgType = string.Empty, bidCtx = string.Empty,
                                   prjAddress = string.Empty, remark = string.Empty,
                                   prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                            prjName   = nodeList[j].ToPlainTextString().Replace(nodeList[j].ToPlainTextString().GetDateRegex(), "");
                            beginDate = nodeList[j].ToPlainTextString().GetDateRegex();
                            bidType   = prjName.GetInviteBidType();
                            InfoUrl   = "http://www.ydszj.gov.cn" + nodeList[j].GetATagHref();

                            string htlDtl = string.Empty;
                            try
                            {
                                htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default);
                                htlDtl = htlDtl.GetJsString();
                            }
                            catch { continue; }
                            parser = new Parser(new Lexer(htlDtl));
                            NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "news_content")));
                            if (dtlList != null && dtlList.Count > 0)
                            {
                                HtmlTxt    = dtlList.ToHtml();
                                bidCtx     = HtmlTxt.ToCtxString();
                                bidUnit    = bidCtx.GetBidRegex();
                                buildUnit  = bidCtx.GetBuildRegex();
                                prjAddress = bidCtx.GetAddressRegex();
                                code       = bidCtx.GetCodeRegex();
                                bidMoney   = bidCtx.GetMoneyRegex();

                                msgType  = "英德市住房和城乡建设局";
                                specType = "建设工程";
                                BidInfo info = ToolDb.GenBidInfo("广东省", "清远市区", "英德市", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType,
                                                                 bidMoney, InfoUrl, prjMgr, HtmlTxt);
                                list.Add(info);
                                if (!crawlAll && list.Count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }
                        }
                        else
                        {
                            string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                                   prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                                   specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                                   remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                                   CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                            prjName    = nodeList[j].ToPlainTextString().Replace(nodeList[j].ToPlainTextString().GetDateRegex(), "");
                            beginDate  = nodeList[j].ToPlainTextString().GetDateRegex();
                            inviteType = prjName.GetInviteBidType();
                            InfoUrl    = "http://www.ydszj.gov.cn" + nodeList[j].GetATagHref();
                            string htlDtl = string.Empty;
                            try
                            {
                                htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default);
                                htlDtl = htlDtl.GetJsString();
                            }
                            catch { continue; }
                            parser = new Parser(new Lexer(htlDtl));
                            NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "news_content")));
                            if (dtlList != null && dtlList.Count > 0)
                            {
                                HtmlTxt   = dtlList.ToHtml();
                                inviteCtx = HtmlTxt.ToCtxString();

                                buildUnit  = inviteCtx.GetBuildRegex();
                                code       = inviteCtx.GetCodeRegex();
                                prjAddress = inviteCtx.GetAddressRegex();

                                msgType  = "英德市住房和城乡建设局";
                                specType = "建设工程";
                                InviteInfo info = ToolDb.GenInviteInfo("广东省", "清远市区", "英德市", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                                list.Add(info);
                                if (!crawlAll && list.Count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Example #2
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default).Replace("&nbsp;", "");
            }
            catch
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "Body_div")), true), new TagNameFilter("li")));

            if (sNode != null && sNode.Count > 0)
            {
                for (int t = 0; t < sNode.Count; t++)
                {
                    string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                    INode node = sNode[t];
                    ATag  aTag = node.GetATag();
                    prjName   = aTag.GetAttribute("title");
                    beginDate = node.ToPlainTextString().GetDateRegex();
                    InfoUrl   = "http://www.tyjzsc.com.cn/" + aTag.Link.GetReplace("./");
                    string htmldtl = string.Empty;
                    try
                    {
                        htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                    }
                    catch { continue; }
                    parser = new Parser(new Lexer(htmldtl));
                    NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("style", "width:650px;")));
                    if (dtlNode != null && dtlNode.Count > 0)
                    {
                        HtmlTxt   = dtlNode.AsHtml();
                        inviteCtx = HtmlTxt.ToCtxString();

                        buildUnit  = inviteCtx.GetBuildRegex();
                        prjAddress = inviteCtx.GetAddressRegex();
                        code       = inviteCtx.GetCodeRegex();
                        msgType    = "太原市建设工程交易中心";
                        specType   = "建设工程";
                        inviteType = prjName.GetInviteBidType();
                        InviteInfo info = ToolDb.GenInviteInfo("山西省", "山西省及地市", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                        list.Add(info);
                        //parser = new Parser(new Lexer(HtmlTxt));
                        //NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                        //if (aNode != null && aNode.Count > 0)
                        //{
                        //    for (int k = 0; k < aNode.Count; k++)
                        //    {
                        //        ATag a = aNode[k] as ATag;
                        //        if (a.IsAtagAttach())
                        //        {
                        //            BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, a.Link);
                        //            base.AttachList.Add(attach);
                        //        }
                        //    }
                        //}
                    }
                    if (!crawlAll && list.Count >= this.MaxCount)
                    {
                        return(list);
                    }
                }
            }
            return(list);
        }
Example #3
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string cookiestr       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "pager")), true), new TagNameFilter("a")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    Regex  reg  = new Regex(@"[0-9]+");
                    string temp = reg.Match(pageNode[pageNode.Count - 1].GetATagHref().Replace("&#39;", "")).Value;
                    pageInt = int.Parse(temp);
                }
                catch
                {
                }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(
                        new string[] { "__VIEWSTATE",
                                       "__EVENTTARGET",
                                       "__EVENTARGUMENT",
                                       "__VIEWSTATEENCRYPTED",
                                       "__EVENTVALIDATION",
                                       "hsa1$DD_LX",
                                       "hsa1$wd",
                                       "pager_input" },
                        new string[] {
                        viewState,
                        "pager",
                        i.ToString(),
                        "",
                        eventValidation,
                        "综合搜索",
                        "",
                        (i - 1).ToString()
                    }
                        );
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "GV1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, area = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        TableRow tr = table.Rows[j];
                        prjName = tr.Columns[0].ToNodePlainString();
                        if (prjName.Contains("["))
                        {
                            area    = prjName.Replace("[", "【").Replace("]", "】").GetRegexBegEnd("【", "】");
                            prjName = prjName.Replace("[" + area + "]", "");
                        }
                        beginDate = tr.Columns[1].ToPlainTextString();
                        InfoUrl   = "http://www.ycsggzy.cn/" + tr.Columns[0].GetATagHref().Replace("&amp;", "&");
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "Lb_nr")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml().GetReplace("<br>", "<br />");
                            inviteCtx  = HtmlTxt.ToCtxString();
                            code       = inviteCtx.GetCodeRegex();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            prjAddress = inviteCtx.GetAddressRegex().Replace(" ", "");
                            inviteType = prjName.GetInviteBidType();
                            specType   = "建设工程";
                            msgType    = "银川市公共资源交易中心";
                            InviteInfo info = ToolDb.GenInviteInfo("宁夏回族自治区", "宁夏回族自治区及地市", "银川市", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Example #4
0
        public void DealHtml(IList list, string html, bool crawlAll)
        {
            Parser   parserDtl = new Parser(new Lexer(html));
            NodeList aNodes    = parserDtl.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "lefttable")));

            if (aNodes != null && aNodes.Count > 0)
            {
                Type     typs  = typeof(ATag);
                TableTag table = aNodes[0] as TableTag;
                for (int t = 1; t < table.RowCount - 1; t++)
                {
                    string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty,
                           inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, ctx = string.Empty, CreateTime = string.Empty, HtmlTxt = string.Empty;

                    TableRow tr   = table.Rows[t] as TableRow;
                    ATag     aTag = tr.SearchFor(typeof(ATag), true)[0] as ATag;

                    InfoUrl = aTag.Link;
                    prjName = table.Rows[t].Columns[1].ToPlainTextString().Replace("\r\n", "").Replace("\t", "").Replace("&nbsp;", " ").Trim();
                    endDate = table.Rows[t].Columns[2].ToPlainTextString().Replace("\r\n", "").Replace("\t", "").Replace("&nbsp;", " ").Trim();
                    string htmlDtl = string.Empty;
                    try
                    {
                        htmlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default);
                    }
                    catch (Exception ex)
                    {
                        continue;
                    }
                    Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>");
                    htmlDtl = regexHtml.Replace(htmlDtl, "");
                    Parser parserCtx = new Parser(new Lexer(htmlDtl));

                    NodeList ctxNode = parserCtx.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "printTb lefttable")));
                    if (ctxNode != null && ctxNode.Count > 0)
                    {
                        Parser   parserdiv = new Parser(new Lexer(htmlDtl));
                        NodeList aNodesdiv = parserdiv.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "biuuu_button")));
                        HtmlTxt = ctxNode.AsHtml().Replace(aNodesdiv.AsHtml(), "").Trim();
                        Type     tp        = typeof(ATag);
                        TableTag tabTag    = ctxNode[0] as TableTag;
                        string   startTime = tabTag.Rows[1].Columns[0].ToPlainTextString().Replace("\r\n", "").Replace("\t", "").Replace("&nbsp;", " ").Trim();
                        Regex    regex     = new Regex(@"时间:\d{4}-\d{1,2}-\d{1,2} \d{1,2}:\d{1,2}:\d{1,2}");
                        Match    math      = regex.Match(startTime);
                        beginDate = math.Value.Replace("时间:", "");

                        Regex regexcode = new Regex("(工程编号|项目编号|招标编号):[^\r\n]+[\r\n]{1}");
                        Match match     = regexcode.Match(tabTag.ToPlainTextString());
                        code = match.Value.Substring(match.Value.IndexOf(":") + 1).Replace("\r\n", "").Replace("\t", "").Replace("&nbsp;", " ").Trim();

                        Regex regexBuildUnit = new Regex("(招标人|建设单位|招标采购单位):[^\r\n]+[\r\n]{1}");
                        Match matchBuildUnit = regexBuildUnit.Match(tabTag.ToPlainTextString());
                        buildUnit = matchBuildUnit.Value.Substring(matchBuildUnit.Value.IndexOf(":") + 1).Replace("\r\n", "").Replace("\t", "").Replace("&nbsp;", " ").Trim();

                        Regex regexAddress = new Regex("(建设地点|项目地点|工程地点):[^\r\n]+[\r\n]{1}");
                        Match matchAddress = regexAddress.Match(tabTag.ToPlainTextString());
                        prjAddress = matchAddress.Value.Substring(matchAddress.Value.IndexOf(":") + 1).Replace("\r\n", "").Replace("\t", "").Replace("&nbsp;", " ").Trim();

                        ctx = tabTag.Rows[2].Columns[0].ToPlainTextString().Replace("&nbsp;", " ").Replace("\r\n\r\n\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n");
                        if (ctx.Length > 0)
                        {
                            Regex regexCtx = new Regex("<!--[^<]+-->");
                            ctx = regexCtx.Replace(ctx, "");
                        }

                        if (Encoding.Default.GetByteCount(code) > 50)
                        {
                            code = "";
                        }
                        if (buildUnit == "" || buildUnit == null)
                        {
                            buildUnit = "";
                        }
                        if (Encoding.Default.GetByteCount(buildUnit) > 150)
                        {
                            buildUnit = buildUnit.Substring(0, 150);
                        }
                        if (Encoding.Default.GetByteCount(prjAddress) > 200)
                        {
                            prjAddress = "见招标公告内容";
                        }
                        if (beginDate.Length > 0 && endDate.Length > 0)
                        {
                            DateTime begin = new DateTime();
                            DateTime end   = new DateTime();
                            try
                            {
                                begin = DateTime.Parse(beginDate);
                                end   = DateTime.Parse(endDate);
                            }
                            catch (Exception)
                            {
                            }
                            if (begin > end)
                            {
                                endDate = string.Empty;
                            }
                        }
                    }

                    parserCtx.Reset();

                    ctxNode = parserCtx.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "toptd_bai")));
                    Regex regDate = new Regex(@"\d{4}-\d{1,2}-\d{1,2}");
                    beginDate = regDate.Match(ctxNode.AsString()).Value.Trim();
                    if (beginDate == "")
                    {
                        beginDate = string.Empty;
                    }
                    inviteType = ToolHtml.GetInviteTypes(prjName);
                    InviteInfo info = ToolDb.GenInviteInfo("广东省", "惠州市区", "惠阳区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, ctx, remark, "惠州市建设工程交易中心", inviteType, "建设工程", string.Empty, InfoUrl, HtmlTxt);
                    list.Add(info);
                    ctxNode = parserCtx.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("a"), new HasAttributeFilter("target", "_blank")));
                    NodeList aTagNodes = ctxNode.SearchFor(typeof(ATag), true);
                    for (int a = 0; a < aTagNodes.Count; a++)
                    {
                        ATag fileTage = aTagNodes[a] as ATag;
                        if (fileTage.Link.Contains("http://www.ebc.huizhou.gov.cn/index/loadNewsFile"))
                        {
                            string     downloadURL = fileTage.Link;
                            BaseAttach attach      = ToolDb.GenBaseAttach(fileTage.ToPlainTextString(), info.Id, downloadURL);
                            base.AttachList.Add(attach);
                        }
                    }
                    if (!crawlAll && list.Count >= this.MaxCount)
                    {
                        return;
                    }
                }
            }
        }
Example #5
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default, ref cookiestr);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "ctl00_ContentPlaceHolder2_lblSumPage")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[0].ToNodePlainString();
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__VIEWSTATE",
                        "__VIEWSTATEGENERATOR",
                        "__EVENTVALIDATION"
                    }, new string[] {
                        "ctl00$ContentPlaceHolder2$lnkBtnNext",
                        "",
                        viewState,
                        "96852609",
                        eventValidation
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr);
                    }
                    catch
                    {
                        continue;
                    }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "695")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount - 1; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.GetATag();
                        prjName   = aTag.LinkText.Trim();
                        beginDate = tr.Columns[1].ToPlainTextString().GetDateRegex();

                        InfoUrl = "http://js.panyu.gov.cn/" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "ctl00_ContentPlaceHolder1_txtContent")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt   = dtlNode.AsHtml();//.Replace("<br", "\r\n<br");
                            inviteCtx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString();
                            code      = inviteCtx.GetCodeRegex().GetCodeDel();
                            buildUnit = inviteCtx.GetBuildRegex();
                            if (buildUnit.Contains("招标代理"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理"));
                            }
                            if (buildUnit.Contains("地址"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址"));
                            }
                            if (buildUnit.Contains("公司"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                            }

                            prjAddress = inviteCtx.GetAddressRegex();
                            inviteType = prjName.GetInviteBidType();
                            msgType    = "广州市番禺区住房和建设局";
                            specType   = "政府采购";
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "广州政府采购", "番禺区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList fileNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (fileNode != null && fileNode.Count > 0)
                            {
                                for (int k = 0; k < fileNode.Count; k++)
                                {
                                    ATag fileAtag = fileNode[k].GetATag();
                                    if (fileAtag.IsAtagAttach())
                                    {
                                        string fileName = fileAtag.LinkText.ToNodeString().Replace(" ", "");
                                        string fileLink = fileAtag.Link;
                                        if (!fileLink.ToLower().Contains("http"))
                                        {
                                            fileLink = "http://js.panyu.gov.cn/" + fileAtag.Link;
                                        }
                                        base.AttachList.Add(ToolDb.GenBaseAttach(fileName, info.Id, fileLink));
                                    }
                                }
                            }

                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Example #6
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new ArrayList();
            //取得页码
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser  = new Parser(new Lexer(html));
            NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "AspNetPager1")));

            if (tdNodes != null && tdNodes.Count > 0)
            {
                string htlPage = tdNodes.ToHtml();
                parser = new Parser(new Lexer(htlPage));
                NodeFilter filer    = new TagNameFilter("a");
                NodeList   pageList = parser.ExtractAllNodesThatMatch(filer);
                if (pageList != null && pageList.Count > 0)
                {
                    for (int i = pageList.Count - 1; i >= 0; i--)
                    {
                        try
                        {
                            ATag   aTag     = pageList.SearchFor(typeof(ATag), true)[i] as ATag;
                            string pageTemp = aTag.Link.Replace("main.aspx?flg=3&id=6&page=", "");
                            pageInt = int.Parse(pageTemp);
                            break;
                        }
                        catch (Exception ex) { }
                    }
                }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl + "&page=" + i.ToString()), Encoding.UTF8);
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "760")));

                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string   code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr = table.Rows[j];

                        beginDate = tr.Columns[2].ToPlainTextString().Trim();
                        ATag aTag = tr.Columns[1].SearchFor(typeof(ATag), true)[0] as ATag;
                        prjName = aTag.LinkText;
                        InfoUrl = "http://www.uho.cn/" + aTag.Link;
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace("&nbsp;", "").Trim();
                            Parser   dtlparserHTML = new Parser(new Lexer(htmldetail));
                            NodeList dtnodeHTML    = dtlparserHTML.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("bordercolor", "#FFFFFF"), new TagNameFilter("table")));
                            HtmlTxt    = dtnodeHTML.AsHtml();
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).ToLower().Replace("&nbsp;", "").Replace("</br>", "\r\n").Replace("<br>", "\r\n").Replace("<br/>", "\r\n");
                            Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>");
                            htmldetail = regexHtml.Replace(htmldetail, "");
                        }
                        catch (Exception ex) { continue; }
                        Parser   dtlparser = new Parser(new Lexer(htmldetail));
                        NodeList dtnode    = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("bordercolor", "#ffffff"), new TagNameFilter("table")));
                        inviteCtx  = dtnode.AsString();
                        specType   = "其他";
                        msgType    = "深圳市友和保险经纪有限公司";
                        inviteType = ToolHtml.GetInviteTypes(prjName);
                        InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳社会招标", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                        list.Add(info);

                        if (!crawlAll && list.Count >= this.MaxCount)
                        {
                            return(list);
                        }
                    }
                }
            }
            return(list);
        }
Example #7
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "content clearfix")), true), new TagNameFilter("script")));

            if (sNode != null && sNode.Count > 0)
            {
                try
                {
                    string page = sNode.ToString().Replace("createPageHTML(", "").Replace(",", "kd").Replace("****", "").Replace("\n", "");
                    page    = page.GetRegexBegEnd("Code", "kd");
                    pageInt = int.Parse(page);
                    //80CGBMPRINKJACJ
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://sgjd.baoan.gov.cn/zbcg/zbgg_139207/index_" + (i - 1) + ".html", Encoding.UTF8);
                    }
                    catch
                    {
                        continue;
                    }
                }
                parser = new Parser(new Lexer(html));
                NodeList viewList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "content clearfix")), true), new TagNameFilter("li")));
                if (viewList != null && viewList.Count > 0)
                {
                    for (int j = 0; j < viewList.Count; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;


                        ATag aTag = viewList[j].GetATag();
                        prjName   = aTag.GetAttribute("title");
                        beginDate = viewList[j].ToPlainTextString().GetDateRegex();
                        InfoUrl   = aTag.Link;
                        InfoUrl   = InfoUrl.GetRegexBegEnd("./", ".html");
                        InfoUrl   = "http://sgjd.baoan.gov.cn/zbcg/zbgg_139207/" + InfoUrl + ".html";
                        string htlDtl = string.Empty;
                        try
                        {
                            htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htlDtl));
                        NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "con")));
                        if (dtlList != null && dtlList.Count > 0)
                        {
                            HtmlTxt    = dtlList.AsHtml();
                            inviteCtx  = HtmlTxt.ToCtxString();
                            inviteType = prjName.GetInviteBidType();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            prjAddress = inviteCtx.GetAddressRegex();
                            if (prjAddress.Contains("电话"))
                            {
                                prjAddress = prjAddress.Remove(prjAddress.IndexOf("电话"));
                            }
                            code = inviteCtx.GetCodeRegex().GetCodeDel();
                            if (code.Contains("》"))
                            {
                                code = code.Remove(code.IndexOf("》"));
                            }
                            if (code.Contains("现对本采购项目的招标事宜公告如下"))
                            {
                                code = code.Replace("现对本采购项目的招标事宜公告如下", "");
                            }
                            specType   = "建设工程";
                            inviteType = "小型工程";
                            msgType    = "深圳市宝安区松岗街道办事处";
                            if (string.IsNullOrEmpty(buildUnit))
                            {
                                buildUnit = "深圳市宝安区松岗街道办事处";
                            }
                            inviteType = ToolHtml.GetInviteType(inviteType);
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳区及街道工程", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Example #8
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new TagNameFilter("select"));

            if (sNode != null && sNode.Count > 0)
            {
                try
                {
                    SelectTag selTag = sNode[0] as SelectTag;
                    string    temp   = selTag.OptionTags[selTag.OptionTags.Length - 1].ToNodePlainString();
                    pageInt = Convert.ToInt32(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&page=" + i, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList viewList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "taxis_border")));
                if (viewList != null && viewList.Count > 0)
                {
                    TableTag table = viewList[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        TableRow tr = table.Rows[j];
                        string   prjName = string.Empty, InfoUrl = string.Empty, beginDate = string.Empty, HtmlTxt = string.Empty;
                        ATag     aTag = tr.GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }
                        prjName   = aTag.LinkText.Trim();
                        InfoUrl   = "http://www.hzhlz.gov.cn/" + aTag.Link;
                        beginDate = tr.Columns[3].ToPlainTextString().GetDateRegex();
                        string htlDtl = string.Empty;
                        try
                        {
                            htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htlDtl));
                        NodeList dtl = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "newsContent")));
                        if (dtl != null && dtl.Count > 0)
                        {
                            HtmlTxt = dtl.AsHtml();
                            string ctx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString();

                            parser.Reset();
                            NodeList nodeName = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "font1")));
                            if (nodeName != null && nodeName.Count > 0)
                            {
                                prjName = nodeName[0].ToNodePlainString().GetReplace(" ").Trim();
                            }
                            else
                            {
                                continue;
                            }
                            if (prjName.Contains("中标") || prjName.Contains("成交") || prjName.Contains("结果"))
                            {
                                string buildUnit = string.Empty, bidUnit = string.Empty,
                                       bidMoney = string.Empty, code = string.Empty,
                                       bidDate = string.Empty,
                                       endDate = string.Empty, bidType = string.Empty,
                                       specType = string.Empty,
                                       msgType = string.Empty, bidCtx = string.Empty,
                                       prjAddress = string.Empty, remark = string.Empty,
                                       prjMgr = string.Empty, otherType = string.Empty;
                                bidCtx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString();

                                code      = bidCtx.GetCodeRegex().GetCodeDel();
                                buildUnit = bidCtx.GetBuildRegex();
                                if (buildUnit.Contains("招标代理"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理"));
                                }
                                if (buildUnit.Contains("公司"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                                }

                                bidUnit = bidCtx.GetBidRegex();
                                if (string.IsNullOrEmpty(bidUnit))
                                {
                                    bidUnit = bidCtx.GetRegex("中标候选公司");
                                }
                                bidMoney = bidCtx.GetMoneyRegex();
                                try
                                {
                                    if (decimal.Parse(bidMoney) > 100000)
                                    {
                                        bidMoney = (decimal.Parse(bidMoney) / 10000).ToString();
                                    }
                                }
                                catch { }
                                msgType  = "广东省惠州市惠城区横沥镇人民政府";
                                specType = "政府采购";
                                bidType  = prjName.GetInviteBidType();
                                BidInfo info = ToolDb.GenBidInfo("广东省", "惠州市区", "惠城区", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType,
                                                                 bidMoney, InfoUrl, prjMgr, HtmlTxt);
                                list.Add(info);
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k].GetATag();
                                        if (a.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = a.Link;
                                            }
                                            else
                                            {
                                                link = "http://www.hzhlz.gov.cn/" + a.Link;
                                            }
                                            BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                            base.AttachList.Add(attach);
                                        }
                                    }
                                }
                                if (!crawlAll && list.Count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }
                            else
                            {
                                string code = string.Empty, buildUnit = string.Empty,
                                       prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                                       specType = string.Empty, endDate = string.Empty,
                                       remark = string.Empty, inviteCon = string.Empty,
                                       CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty;

                                inviteCtx  = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString();
                                inviteType = prjName.GetInviteBidType();


                                code       = inviteCtx.GetCodeRegex().GetCodeDel();
                                buildUnit  = inviteCtx.GetBuildRegex();
                                prjAddress = inviteCtx.GetAddressRegex();
                                if (buildUnit.Contains("招标代理"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理"));
                                }
                                if (buildUnit.Contains("公司"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                                }


                                msgType = "广东省惠州市惠城区横沥镇人民政府";

                                specType = "政府采购";

                                InviteInfo info = ToolDb.GenInviteInfo("广东省", "惠州市区", "惠城区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                                list.Add(info);

                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k].GetATag();
                                        if (a.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = a.Link;
                                            }
                                            else
                                            {
                                                link = "http://www.hzhlz.gov.cn/" + a.Link;
                                            }
                                            BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                            base.AttachList.Add(attach);
                                        }
                                    }
                                }
                                if (!crawlAll && list.Count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Example #9
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(list);
            }
            Parser parser = new Parser(new Lexer(html));
            // NodeList pageNode = parser.ExtractAllNodesThatMatch((new TagNameFilter("div")));

            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "ewb-page-items clearfix")));

            if (pageNode != null && pageNode.Count > 0)
            {
                string sa   = pageNode.AsHtml();
                string dd   = sa.Replace("</p>", "\r\n").ToCtxString();
                string temp = dd.GetRegexBegEnd("1/", "\r").GetReplace("\r\n", "");
                pageInt = int.Parse(temp);
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.hyggzy.gov.cn/zfcg/" + i + ".html");
                    }
                    catch
                    {
                        continue;
                    }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("li"), new HasAttributeFilter("class", "ewb-com-item clearfix")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    for (int j = 0; j < nodeList.Count; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        ATag aTag = nodeList[j].GetATag();
                        prjName = aTag.GetAttribute("title");
                        InfoUrl = "http://www.hyggzy.gov.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "ewb-list-bd")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt   = dtlNode.AsHtml();
                            inviteCtx = HtmlTxt.Replace("</p>", "\r\n").ToCtxString();


                            prjAddress = inviteCtx.GetRegex("公开摇珠地点");
                            beginDate  = inviteCtx.GetRegex("发布时间");
                            beginDate  = beginDate.GetDateRegex();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            if (string.IsNullOrWhiteSpace(prjAddress))
                            {
                                prjAddress = inviteCtx.GetAddressRegex();
                            }


                            prjAddress = inviteCtx.GetRegex(new string[] { "开标地点" }).GetCodeDel();
                            if (string.IsNullOrWhiteSpace(prjAddress))
                            {
                                prjAddress = inviteCtx.GetRegex("投标及开标地点");
                            }


                            if (string.IsNullOrWhiteSpace(buildUnit))
                            {
                                buildUnit = inviteCtx.GetRegex("项目单位");
                            }


                            if (string.IsNullOrWhiteSpace(endDate))
                            {
                                endDate = inviteCtx.GetRegexBegEnd("至", "止");
                            }
                            endDate = endDate.GetReplace("年", "-").GetReplace("月", "-").GetReplace("日", "");


                            code = inviteCtx.GetCodeRegex(new string[] { "项目编号" }).GetCodeDel();
                            if (string.IsNullOrWhiteSpace(code))
                            {
                                code = inviteCtx.GetRegexBegEnd("编号:", "采购").GetReplace("\r\n", "");
                            }
                            if (string.IsNullOrWhiteSpace(code))
                            {
                                code = inviteCtx.GetRegexBegEnd("项目(", ")").GetReplace("\r\n", "");
                            }



                            msgType    = "河源市公共资源交易中心";
                            specType   = "政府采购";
                            inviteType = ToolHtml.GetInviteTypes(prjName);
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "河源市区", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.hyggzy.gov.cn" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Example #10
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "menu")));

            if (sNode != null && sNode.Count > 0)
            {
                try
                {
                    string temp = sNode.AsString().ToNodeString().Replace("条共", "");
                    Regex  reg  = new Regex(@"条[^页]+页");
                    string page = reg.Match(temp).Value.Replace("条", "").Replace("页", "");
                    pageInt = Convert.ToInt32(page);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl + "&page=" + i.ToString(), Encoding.Default);
                    }
                    catch
                    { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "90%")));
                if (nodeList != null && nodeList.Count > 1)
                {
                    TableTag table = nodeList[nodeList.Count - 2] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        TableRow tr      = table.Rows[j];
                        string   prjType = tr.Columns[1].ToNodePlainString();
                        if (prjType.Contains("中标") || prjType.Contains("结果"))
                        {
                            string prjName = string.Empty,
                                   buildUnit = string.Empty, bidUnit = string.Empty,
                                   bidMoney = string.Empty, code = string.Empty,
                                   bidDate = string.Empty,
                                   beginDate = string.Empty,
                                   endDate = string.Empty, bidType = string.Empty,
                                   specType = string.Empty, InfoUrl = string.Empty,
                                   msgType = string.Empty, bidCtx = string.Empty,
                                   prjAddress = string.Empty, remark = string.Empty,
                                   prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                            prjName   = prjType;
                            beginDate = tr.Columns[3].ToNodePlainString();
                            bidType   = prjName.GetInviteBidType();
                            InfoUrl   = "http://net.suixi.gov.cn/com/ghjsj/" + tr.Columns[1].GetATagHref();
                            string htlDtl = string.Empty;
                            try
                            {
                                htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default);
                                htlDtl = htlDtl.GetJsString();
                            }
                            catch { continue; }

                            parser = new Parser(new Lexer(htlDtl));
                            NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "92%")));
                            if (dtlList != null && dtlList.Count > 0)
                            {
                                HtmlTxt = dtlList.ToHtml();
                                bidCtx  = HtmlTxt.ToCtxString();

                                bidUnit = bidCtx.GetBidRegex();
                                if (string.IsNullOrEmpty(bidUnit))
                                {
                                    bidUnit = bidCtx.ToNodeString().GetRegexBegEnd("评标委员会推荐", "为中标候选");
                                }
                                bidMoney = bidCtx.GetMoneyRegex();
                                if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0")
                                {
                                    bidMoney = bidCtx.ToNodeString().GetRegexBegEnd("中标价", ",");
                                }
                                prjAddress = bidCtx.GetAddressRegex();
                                code       = bidCtx.GetCodeRegex();
                                buildUnit  = bidCtx.GetBuildRegex();
                                msgType    = "遂溪住房和城乡规划建设局";
                                specType   = "建设工程";
                                BidInfo info = ToolDb.GenBidInfo("广东省", "湛江市区", "遂溪县", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType,
                                                                 bidMoney, InfoUrl, prjMgr, HtmlTxt);
                                list.Add(info);
                                if (!crawlAll && list.Count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }
                        }
                        else
                        {
                            string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                                   prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                                   specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                                   remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                                   CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                            prjName    = prjType;
                            beginDate  = tr.Columns[3].ToNodePlainString();
                            inviteType = prjName.GetInviteBidType();
                            InfoUrl    = "http://net.suixi.gov.cn/com/ghjsj/" + tr.Columns[1].GetATagHref();
                            string htlDtl = string.Empty;
                            try
                            {
                                htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default);
                                htlDtl = htlDtl.GetJsString();
                            }
                            catch { continue; }

                            parser = new Parser(new Lexer(htlDtl));
                            NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "92%")));
                            if (dtlList != null && dtlList.Count > 0)
                            {
                                HtmlTxt   = dtlList.ToHtml();
                                inviteCtx = HtmlTxt.ToCtxString();


                                buildUnit  = inviteCtx.GetBuildRegex();
                                code       = inviteCtx.GetCodeRegex();
                                prjAddress = inviteCtx.GetAddressRegex();

                                msgType  = "遂溪住房和城乡规划建设局";
                                specType = "建设工程";
                                InviteInfo info = ToolDb.GenInviteInfo("广东省", "湛江市区", "遂溪县", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                                list.Add(info);
                                if (!crawlAll && list.Count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Example #11
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new List <InviteInfo>();

            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(list);
            }


            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("script"), new HasAttributeFilter("type", "text/javascript")));
            string   b        = pageNode.AsString().GetCtxBr();
            string   c        = b.Replace("('", "徐鑫").Replace("')", "凯德");

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = c.GetRegexBegEnd("徐鑫", "凯德");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i >= 1)
                {
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(
                        new string[] { "fcInfotitle",
                                       "currentPage" },
                        new string[] {
                        "",
                        i.ToString()
                    }
                        );
                    try
                    {
                        string a1 = nvc[0];
                        html = this.ToolWebSite.GetHtmlByUrl("https://www.dgzb.com.cn/ggzy/website/WebPagesManagement/TradeInfo/GovProcurement/findListByPage?fcInfotype=1&openbidbelong=ZJ", nvc, Encoding.UTF8);
                    }
                    catch
                    {
                        continue;
                    }
                }
                JavaScriptSerializer        serializer  = new JavaScriptSerializer();
                Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html);
                foreach (KeyValuePair <string, object> obj in smsTypeJson)
                {
                    object[] array = (object[])obj.Value;

                    foreach (object arrValue in array)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        Dictionary <string, object> dic = (Dictionary <string, object>)arrValue;
                        code      = Convert.ToString(dic["fcTendersn"]);
                        prjName   = Convert.ToString(dic["fcInfotitle"]);
                        beginDate = Convert.ToString(dic["fcInfostartdate"]).GetDateRegex("yyyy-MM-dd");

                        string xu = Convert.ToString(dic["publishinfoid"]);
                        InfoUrl = "https://www.dgzb.com.cn/ggzy/website/WebPagesManagement/TradeInfo/GovProcurement/govdetail?publishinfoid=" + xu + "&fcInfotype=1";
                        //fcInfocontent

                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = dic["fcInfocontent"].ToString();
                            // htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch
                        {
                            continue;
                        }
                        bool isTable = true;
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("align", "center")));
                        if (dtlNode == null || dtlNode.Count < 1)
                        {
                            isTable = false;
                            parser  = new Parser(new Lexer(htmldtl));
                            dtlNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("body"));
                        }
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.ToHtml();
                            if (isTable)
                            {
                                TableTag dtlTable = dtlNode[0] as TableTag;
                                for (int d = 0; d < dtlTable.RowCount; d++)
                                {
                                    try
                                    {
                                        inviteCtx += dtlTable.Rows[d].Columns[0].ToPlainTextString().Replace(":", "").Replace(":", "") + ":";
                                        inviteCtx += dtlTable.Rows[d].Columns[1].ToPlainTextString() + "\r\n";
                                    }
                                    catch { }
                                }
                                if (string.IsNullOrEmpty(inviteCtx))
                                {
                                    inviteCtx = HtmlTxt.ToCtxString();
                                }
                            }
                            else
                            {
                                // inviteCtx = HtmlTxt.ToCtxString();

                                inviteCtx = HtmlTxt.Replace("</p>", "\r\n").ToCtxString();
                            }
                            prjAddress = inviteCtx.GetRegexBegEnd("地点", "十");
                            if (string.IsNullOrEmpty(prjAddress))
                            {
                                prjAddress = inviteCtx.GetAddressRegex();
                            }
                            inviteType = prjName.GetInviteBidType();

                            msgType  = "东莞市政府采购";
                            specType = "政府采购";

                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "东莞市区", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aTagNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aTagNode != null && aTagNode.Count > 0)
                            {
                                for (int k = 0; k < aTagNode.Count; k++)
                                {
                                    ATag aTag = aTagNode[k].GetATag();
                                    if (aTag.IsAtagAttach())
                                    {
                                        string linkurl = aTag.Link;
                                        linkurl = linkurl.Replace("&amp;", "&");
                                        string cc = string.Empty;
                                        string aa = linkurl.GetRegexBegEnd("&", "id");
                                        if (aa == "")
                                        {
                                            cc = linkurl;
                                        }
                                        else
                                        {
                                            cc = linkurl.Replace(aa, "");
                                        }

                                        BaseAttach attach = ToolDb.GenBaseAttach(aTag.LinkText, info.Id, cc);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
                //Parser parser = new Parser(new Lexer(html));
                //NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("script"), new HasAttributeFilter("type", "text/javascript")));
                //if (pageNode != null && pageNode.Count > 0)
                //{
                //    try
                //    {
                //        string temp = pageNode.AsString().GetRegexBegEnd("找到", "条");
                //        int page = int.Parse(temp);
                //        pageInt = page / 30 + 1;
                //    }
                //    catch { }
                //}
                //for (int i = 1; i <= pageInt; i++)
                //{
                //    if (i > 1)
                //    {
                //        NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(
                //            new string[] { "ec_i",
                //                "topicChrList_crd",
                //                "topicChrList_f_a",
                //            "topicChrList_p",
                //            "topicChrList_s_stockProjectCode",
                //            "topicChrList_s_name",
                //            "topicChrList_s_ldate",
                //            "id",
                //            "method",
                //            "__ec_pages",
                //            "topicChrList_rd",
                //            "topicChrList_f_stockProjectCode",
                //            "topicChrList_f_name",
                //            "topicChrList_f_ldate",},
                //            new string[]{
                //            "topicChrList",
                //            "30",
                //            "",
                //            i.ToString(),
                //            "topicChrList_s_stockProjectCode",
                //            "topicChrList_s_name",
                //            "topicChrList_s_ldate",
                //            "1662",
                //            "view",
                //            "1",
                //            "30",
                //            "","",""
                //            }
                //            );
                //        try
                //        {
                //            html = this.ToolWebSite.GetHtmlByUrl("http://dggp.dg.gov.cn/portal/topicView.do?", nvc, Encoding.UTF8);
                //        }
                //        catch
                //        {
                //            continue;
                //        }
                //    }
                //    parser = new Parser(new Lexer(html));
                //    NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "topicChrList_table")));
                //    if (listNode != null && listNode.Count > 0)
                //    {
                //        TableTag table = listNode[0] as TableTag;
                //        for (int j = 3; j < table.RowCount; j++)
                //        {
                //            TableRow tr = table.Rows[j];
                //            string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;


                //            prjName = tr.Columns[1].ToNodePlainString();
                //            code = tr.Columns[0].ToNodePlainString();
                //            buildUnit = tr.Columns[2].ToNodePlainString();
                //            beginDate = tr.Columns[3].ToPlainTextString().GetDateRegex("yyyy年MM月dd日");
                //            InfoUrl = "http://dggp.dg.gov.cn/portal/documentView.do?method=view&id=" + tr.Columns[1].GetATagHref().Replace("/viewer.do?id=", "");
                //            string htmldtl = string.Empty;
                //            try
                //            {
                //                htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                //            }
                //            catch
                //            {
                //                continue;
                //            }
                //            bool isTable = true;
                //            parser = new Parser(new Lexer(htmldtl));
                //            NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "bulletinContent")));
                //            if (dtlNode == null || dtlNode.Count < 1)
                //            {
                //                isTable = false;
                //                parser.Reset();
                //                dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "container")));
                //            }
                //            if (dtlNode == null || dtlNode.Count < 1)
                //            {
                //                isTable = false;
                //                parser = new Parser(new Lexer(htmldtl));
                //                dtlNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("body"));
                //            }
                //            if (dtlNode != null && dtlNode.Count > 0)
                //            {
                //                HtmlTxt = dtlNode.ToHtml();
                //                if (isTable)
                //                {
                //                    TableTag dtlTable = dtlNode[0] as TableTag;
                //                    for (int d = 0; d < dtlTable.RowCount; d++)
                //                    {
                //                        try
                //                        {
                //                            inviteCtx += dtlTable.Rows[d].Columns[0].ToPlainTextString().Replace(":", "").Replace(":", "") + ":";
                //                            inviteCtx += dtlTable.Rows[d].Columns[1].ToPlainTextString() + "\r\n";
                //                        }
                //                        catch { }
                //                    }
                //                }
                //                if (string.IsNullOrEmpty(inviteCtx)) inviteCtx = HtmlTxt.ToCtxString();

                //                prjAddress = inviteCtx.GetAddressRegex();
                //                inviteType = prjName.GetInviteBidType();

                //                msgType = "东莞市政府采购";
                //                specType = "政府采购";

                //                InviteInfo info = ToolDb.GenInviteInfo("广东省", "东莞市区", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                //                list.Add(info);
                //                parser = new Parser(new Lexer(HtmlTxt));
                //                NodeList aTagNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                //                if (aTagNode != null && aTagNode.Count > 0)
                //                {
                //                    for (int k = 0; k < aTagNode.Count; k++)
                //                    {
                //                        ATag aTag = aTagNode[k].GetATag();
                //                        if (aTag.IsAtagAttach())
                //                        {
                //                            BaseAttach attach = ToolDb.GenBaseAttach(aTag.LinkText, info.Id, "http://dggp.dg.gov.cn" + aTag.Link);
                //                            base.AttachList.Add(attach);
                //                        }
                //                    }
                //                }
                //                if (!crawlAll && list.Count >= this.MaxCount) return list;
                //            }

                //        }
                //    }
                //}
            }
            return(list);
        }
Example #12
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new List <InviteInfo>();
            Dictionary <string, string> dic = new Dictionary <string, string>();

            dic.Add("工程招标", "gczb/");
            dic.Add("货物招标", "hwzb/");
            dic.Add("服务招标", "fwzb/");
            foreach (string key in dic.Keys)
            {
                int    pageInt = 1, listCount = 0;
                string html            = string.Empty;
                string viewState       = string.Empty;
                string eventValidation = string.Empty;
                try
                {
                    html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + dic[key]);
                }
                catch { continue; }
                Parser   parser   = new Parser(new Lexer(html));
                NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "list_page_text")));
                if (pageNode != null && pageNode.Count > 0)
                {
                    string temp = pageNode.AsString();
                    try
                    {
                        pageInt = int.Parse(temp.GetRegexBegEnd("HTML", ",").Replace("(", ""));
                    }
                    catch { }
                }
                for (int i = 1; i <= pageInt; i++)
                {
                    if (i > 1)
                    {
                        try
                        {
                            html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + dic[key] + "index_" + (i - 1).ToString() + ".shtml");
                        }
                        catch { continue; }
                    }
                    parser = new Parser(new Lexer(html));
                    NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "list_1_right_list")), true), new TagNameFilter("ul")));
                    if (listNode != null && listNode.Count > 0)
                    {
                        for (int j = 0; j < listNode.Count; j++)
                        {
                            string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                                   prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                                   specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                                   remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                                   CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                            beginDate = listNode[j].ToPlainTextString().GetDateRegex();
                            ATag aTag = listNode[j].GetATag();
                            prjName = aTag.LinkText;
                            InfoUrl = this.SiteUrl + dic[key] + aTag.Link.Replace("../", "").Replace("./", "").Replace(" ", "");
                            string htmldtl = string.Empty;
                            try
                            {
                                htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                            }
                            catch { continue; }
                            parser = new Parser(new Lexer(htmldtl));
                            NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "text")));
                            if (dtlNode != null && dtlNode.Count > 0)
                            {
                                HtmlTxt    = dtlNode.AsHtml();
                                inviteCtx  = HtmlTxt.ToCtxString();
                                buildUnit  = inviteCtx.GetBuildRegex();
                                prjAddress = inviteCtx.GetAddressRegex();
                                parser     = new Parser(new Lexer(HtmlTxt));
                                NodeList namNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "text_bt")));
                                if (namNode != null && namNode.Count > 0)
                                {
                                    string temp = prjName;
                                    prjName = namNode[0].ToNodePlainString();
                                    if (!prjName.Contains(temp.Replace(".", "")))
                                    {
                                        prjName = temp;
                                    }
                                }
                                inviteType = key;
                                specType   = "建设工程";
                                msgType    = "天津市发展和改革委员会";
                                InviteInfo info = ToolDb.GenInviteInfo("天津市", "天津市区", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                                list.Add(info);
                                listCount++;
                                if (!crawlAll && listCount >= this.MaxCount)
                                {
                                    goto type;
                                }
                            }
                        }
                    }
                }
                type : continue;
            }
            return(list);
        }
Example #13
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default, ref cookiestr);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "lblPageCount")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[0].ToNodePlainString();
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__LASTFOCUS",
                        "__VIEWSTATE",
                        "__VIEWSTATEGENERATOR",
                        "__EVENTVALIDATION",
                        "gcbh_Text_Box",
                        "gcmc_TextBox",
                        "num_TextBox",
                        "ImageButton3.x",
                        "ImageButton3.y"
                    }, new string[] {
                        "", "", "",
                        viewState,
                        "B0108473",
                        eventValidation,
                        "", "", "",
                        "5", "12"
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "DataGrid1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        prjName   = aTag.LinkText.GetReplace(" ");
                        buildUnit = tr.Columns[3].ToNodePlainString();
                        endDate   = tr.Columns[4].ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://www.bcactc.com/home/gcxx/" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "hei_text")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            TableTag dtlTable = dtlNode[0] as TableTag;
                            HtmlTxt = dtlTable.ToHtml();
                            for (int r = 0; r < dtlTable.RowCount; r++)
                            {
                                for (int c = 0; c < dtlTable.Rows[r].ColumnCount; c++)
                                {
                                    if ((c + 1) % 2 == 0)
                                    {
                                        inviteCtx += dtlTable.Rows[r].Columns[c].ToNodePlainString() + "\r\n";
                                    }
                                    else
                                    {
                                        inviteCtx += dtlTable.Rows[r].Columns[c].ToNodePlainString().GetReplace(":,:") + ":";
                                    }
                                }
                            }
                            code       = inviteCtx.GetCodeRegex();
                            prjAddress = inviteCtx.GetAddressRegex().GetCodeDel();
                            beginDate  = inviteCtx.GetRegex("登记日期,登记时间").GetDateRegex();
                            msgType    = "北京市建设工程发包承包交易中心";
                            specType   = "建设工程";
                            inviteType = "勘察设计";
                            InviteInfo info = ToolDb.GenInviteInfo("北京市", "北京市区", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Example #14
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "pagination")), true), new TagNameFilter("a")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[pageNode.Count - 2].GetATag().Link.Replace("31", "").Replace(".htm", "");
                    pageInt = int.Parse(temp);
                }
                catch {
                    pageInt = 32;
                }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.topway.com.cn/6/12/default_" + (i - 1).ToString() + ".htm", Encoding.UTF8);
                    }
                    catch {
                        continue;
                    }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("id", "newsList")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        prjName   = listNode[j].GetATagValue("title");
                        beginDate = listNode[j].ToPlainTextString().GetDateRegex();

                        InfoUrl = "http://www.topway.com.cn/6/12/" + listNode[j].GetATagHref();

                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "fwRight float_l marginL_10")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml().Replace("<br>", "\r\n").Replace("<br/>", "\r\n").Replace("<br />", "\r\n");
                            inviteCtx  = HtmlTxt.ToCtxString().Replace("&ldquo;", "").Replace("&ldquo;", "").Replace("&rdquo;", "");
                            buildUnit  = inviteCtx.GetBuildRegex();
                            code       = inviteCtx.GetCodeRegex();
                            prjAddress = inviteCtx.GetAddressRegex();

                            inviteType = prjName.GetInviteBidType();
                            msgType    = "深圳市天威视讯股份有限公司";
                            specType   = "其他";

                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳社会招标", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);

                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Example #15
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("align", "left")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("/", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "Title", "zbr", "xmszd", "zbfwpt", "kbrq", "zblx", "zb", "page"
                    }, new string[] {
                        "", "", "", "", "", "", "1", i.ToString()
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("dt"), new HasAttributeFilter("class", "ny_news")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        ATag aTag = listNode[j].GetATag();
                        prjName = aTag.GetAttribute("title");
                        if (string.IsNullOrWhiteSpace(prjName))
                        {
                            prjName = aTag.LinkText;
                        }
                        InfoUrl   = aTag.Link.Trim();
                        beginDate = listNode[j].ToPlainTextString().GetDateRegex();
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default);
                        }
                        catch (Exception)
                        {
                            continue;
                        }
                        Parser   parserdetail = new Parser(new Lexer(htmldetail));
                        NodeList dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "ny_wz")));
                        if (dtnode != null && dtnode.Count > 0)
                        {
                            HtmlTxt    = dtnode.AsHtml();
                            inviteCtx  = HtmlTxt.ToCtxString();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            prjAddress = inviteCtx.GetAddressRegex();
                            code       = inviteCtx.GetCodeRegex();

                            if (buildUnit.Contains("公司"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                            }
                            msgType    = "海南省发展和改革委员会";
                            specType   = "建设工程";
                            inviteType = ToolHtml.GetInviteTypes(prjName);
                            InviteInfo info = ToolDb.GenInviteInfo("海南省", "海南省及地市", "",
                                                                   string.Empty, code, prjName, prjAddress, buildUnit,
                                                                   beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://ztb.hainan.gov.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }

            return(list);
        }
Example #16
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            int    pageInt         = 3;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch
            {
                return(list);
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&page=" + i, Encoding.Default);
                    }
                    catch { continue; }
                }
                Parser   parser   = new Parser(new Lexer(html));
                NodeList viewList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("valign", "top"))), new TagNameFilter("table")));
                if (viewList != null && viewList.Count > 0)
                {
                    TableTag table = null;
                    for (int l = 0; l < viewList.Count; l++)
                    {
                        TableTag tagTable = viewList[l] as TableTag;
                        if (tagTable.RowCount >= 1)
                        {
                            TableColumn cell = tagTable.Rows[0].Columns[0];
                            if (cell.GetAttribute("width") == "30" && cell.GetAttribute("height") == "25")
                            {
                                table = tagTable;
                            }
                        }
                    }
                    if (table == null)
                    {
                        continue;
                    }
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string   prjName = string.Empty, InfoUrl = string.Empty, beginDate = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }
                        prjName   = aTag.GetAttribute("title");
                        beginDate = table.ToNodePlainString().GetDateRegex();
                        InfoUrl   = "http://www.tanbu.gov.cn/" + aTag.Link;
                        string htlDtl = string.Empty;
                        try
                        {
                            htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htlDtl));
                        NodeList dtl = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("width", "706")));
                        if (dtl != null && dtl.Count > 0)
                        {
                            HtmlTxt = dtl.AsHtml();

                            if (prjName.Contains("中标") || prjName.Contains("成交") || prjName.Contains("结果") || prjName.Contains("候选人公示"))
                            {
                                string buildUnit = string.Empty, bidUnit = string.Empty,
                                       bidMoney = string.Empty, code = string.Empty,
                                       bidDate = string.Empty, endDate = string.Empty,
                                       bidType = string.Empty, specType = string.Empty,
                                       msgType = string.Empty, bidCtx = string.Empty,
                                       prjAddress = string.Empty, remark = string.Empty,
                                       prjMgr = string.Empty, otherType = string.Empty;
                                bidCtx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString();

                                code      = bidCtx.GetCodeRegex().GetCodeDel();
                                buildUnit = bidCtx.GetBuildRegex();
                                if (buildUnit.Contains("招标代理"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理"));
                                }
                                if (buildUnit.Contains("公司"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                                }

                                bidUnit = bidCtx.GetBidRegex();
                                if (string.IsNullOrEmpty(bidUnit))
                                {
                                    bidUnit = bidCtx.GetRegex("中标候选公司,中标候选人");
                                }
                                bidMoney = bidCtx.GetMoneyRegex();
                                if (string.IsNullOrEmpty(bidUnit))
                                {
                                    parser = new Parser(new Lexer(HtmlTxt));
                                    NodeList bidNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellspacing", "1")));
                                    if (bidNode != null && bidNode.Count > 0)
                                    {
                                        string   ctx      = string.Empty;
                                        TableTag bidTable = bidNode[0] as TableTag;
                                        if (bidTable.RowCount >= 2 && bidTable.Rows[0].ColumnCount >= 2)
                                        {
                                            ctx  = bidTable.Rows[0].Columns[1].ToNodePlainString() + ":";
                                            ctx += bidTable.Rows[1].Columns[1].ToNodePlainString() + "\r\n";
                                        }
                                        bidUnit = ctx.GetBidRegex(new string[] { "候选人" });
                                    }
                                }
                                try
                                {
                                    if (decimal.Parse(bidMoney) > 100000)
                                    {
                                        bidMoney = (decimal.Parse(bidMoney) / 10000).ToString();
                                    }
                                }
                                catch { }

                                msgType  = "广州市花都区炭步镇人民政府";
                                specType = "政府采购";
                                bidType  = prjName.GetInviteBidType();
                                BidInfo info = ToolDb.GenBidInfo("广东省", "广州政府采购", "花都区", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType,
                                                                 bidMoney, InfoUrl, prjMgr, HtmlTxt);
                                list.Add(info);

                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k].GetATag();
                                        if (a.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = a.Link;
                                            }
                                            else
                                            {
                                                link = "http://www.tanbu.gov.cn/" + a.Link;
                                            }
                                            BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                            base.AttachList.Add(attach);
                                        }
                                    }
                                }
                                if (!crawlAll && list.Count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }
                            else
                            {
                                string code = string.Empty, buildUnit = string.Empty,
                                       prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                                       specType = string.Empty, endDate = string.Empty,
                                       remark = string.Empty, inviteCon = string.Empty,
                                       CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty;

                                inviteCtx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString();

                                inviteType = prjName.GetInviteBidType();

                                code       = inviteCtx.GetCodeRegex().GetCodeDel();
                                buildUnit  = inviteCtx.GetBuildRegex();
                                prjAddress = inviteCtx.GetAddressRegex();
                                if (buildUnit.Contains("招标代理"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理"));
                                }
                                if (buildUnit.Contains("公司"))
                                {
                                    buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                                }


                                msgType = "广州市花都区炭步镇人民政府";

                                specType = "政府采购";

                                InviteInfo info = ToolDb.GenInviteInfo("广东省", "广州政府采购", "花都区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                                list.Add(info);

                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k].GetATag();
                                        if (a.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = a.Link;
                                            }
                                            else
                                            {
                                                link = "http://www.tanbu.gov.cn/" + a.Link;
                                            }
                                            BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                            base.AttachList.Add(attach);
                                        }
                                    }
                                }
                                if (!crawlAll && list.Count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Example #17
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1101;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch { return(null); }

            for (int i = 1; i < pageInt; i++)
            {
                if (i > 1)
                {
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[]
                                                                                      { "listVo.projectName",
                                                                                        "listVo.serviceType",
                                                                                        "listVo.selectType",
                                                                                        "listVo.simpleProject",
                                                                                        "listVo.parentProject",
                                                                                        "listVo.parentRewardProject",
                                                                                        "listVo.rewardProject",
                                                                                        "listVo.selfProject",
                                                                                        "pageNumber",
                                                                                        "sourtType" },
                                                                                      new string[] {
                        "",
                        "",
                        "",
                        "true",
                        "true",
                        "true",
                        "true",
                        "true",
                        (i - 1).ToString(),
                        ""
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8);
                    }
                    catch { continue; }
                }
                Parser   parser   = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "table table-hover table-bordered table-list")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[0].GetATag();

                        prjName   = aTag.LinkText.ToRegString();
                        beginDate = tr.Columns[3].ToPlainTextString().GetDateRegex();
                        buildUnit = tr.Columns[1].ToNodePlainString();
                        InfoUrl   = "http://183.63.34.189" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "pannel-cont")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.AsHtml();
                            parser  = new Parser(new Lexer(HtmlTxt));
                            NodeList bidNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                            if (bidNode != null && bidNode.Count > 0)
                            {
                                string   ctx      = string.Empty;
                                TableTag bidTable = bidNode[0] as TableTag;
                                for (int r = 0; r < bidTable.RowCount; r++)
                                {
                                    for (int c = 0; c < bidTable.Rows[r].ColumnCount; c++)
                                    {
                                        string temp = bidTable.Rows[r].Columns[c].ToNodePlainString();
                                        if (c % 2 == 0)
                                        {
                                            ctx += temp + ":";
                                        }
                                        else
                                        {
                                            ctx += temp + "\r\n";
                                        }
                                    }
                                }
                                code       = ctx.GetCodeRegex().GetCodeDel();
                                prjAddress = ctx.GetAddressRegex();
                            }
                            inviteCtx = HtmlTxt.ToCtxString();

                            msgType    = "惠州市中介超市建设领导小组办公室";
                            specType   = "建设工程";
                            inviteType = prjName.GetInviteBidType();
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "惠州市区", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach() || a.Link.Contains("downloadfile"))
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://183.63.34.189/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Example #18
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("id", "id1")));

            if (sNode != null && sNode.Count > 0)
            {
                try
                {
                    SelectTag tag = sNode[0] as SelectTag;

                    string temp = tag.OptionTags[tag.OptionTags.Length - 1].StringText;
                    pageInt = int.Parse(temp.GetReplace("第,页"));
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&page=" + i, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList viewList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellpadding", "4")));

                if (viewList != null && viewList.Count > 0)
                {
                    TableTag table = viewList[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        TableRow tr = table.Rows[j];
                        if (tr.ColumnCount == 1)
                        {
                            continue;
                        }
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        ATag aTag = tr.Columns[1].GetATag();
                        beginDate = tr.Columns[1].ToPlainTextString().GetDateRegex();

                        prjName = aTag.GetAttribute("title");

                        InfoUrl = "http://liaobu.dg.gov.cn/" + aTag.Link;
                        string htmDtl = string.Empty;
                        try
                        {
                            htmDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default);
                            Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>|<style[^<]*</style>|<xml[^<]*</xml>");
                            htmDtl = regexHtml.Replace(htmDtl, "");
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmDtl));
                        NodeList dtl = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "cont")));
                        if (dtl != null && dtl.Count > 0)
                        {
                            HtmlTxt    = dtl.AsHtml();
                            inviteCtx  = HtmlTxt.ToCtxString();
                            inviteType = prjName.GetInviteBidType();
                            prjAddress = inviteCtx.GetAddressRegex();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            code       = inviteCtx.GetCodeRegex().GetCodeDel();


                            msgType    = "东莞市寮步镇政府";
                            specType   = "政府采购";
                            inviteType = prjName.GetInviteBidType();
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "东莞市区", "寮步镇", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);

                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k].GetATag();
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://liaobu.dg.gov.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Example #19
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "NewsPage")));

            if (sNode != null && sNode.Count > 0)
            {
                try
                {
                    string temp = sNode.AsString().Replace("createPageHTML(", "").Replace("index", "").Replace("html", "").Replace(", 0,", "").Replace(");", "").Replace(",", "").Replace(";", "").Replace(")", "").Replace("\"", "").Replace(" ", "");
                    pageInt = Convert.ToInt32(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.shajing.gov.cn/xxgk_14947/ywxx/zbcg/zbgg/index_" + (i - 1).ToString() + ".html", Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList viewList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "NewsLiks01Text"))), new TagNameFilter("ul"))), new TagNameFilter("li")));
                if (viewList != null && viewList.Count > 0)
                {
                    for (int j = 0; j < viewList.Count; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        Regex regDate = new Regex(@"\d{4}-\d{1,2}-\d{1,2}");
                        beginDate = regDate.Match(viewList[j].ToPlainTextString().Trim()).Value;
                        string temp = viewList[j].ToPlainTextString().Trim().Replace(beginDate, "");
                        try
                        {
                            int beg = temp.IndexOf("else{"), end = temp.Length;
                            temp    = temp.Substring(beg, end - beg);
                            beg     = temp.IndexOf("<a");
                            end     = temp.IndexOf("/a>");
                            temp    = temp.Substring(beg, (end - beg) + 3);
                            beg     = temp.IndexOf(">");
                            end     = temp.IndexOf("</");
                            prjName = temp.Substring(beg + 1, end - beg - 1);
                            Parser   p    = new Parser(new Lexer(temp));
                            NodeList l    = p.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            ATag     aTag = l.SearchFor(typeof(ATag), true)[0] as ATag;
                            InfoUrl = "http://www.shajing.gov.cn/xxgk_14947/ywxx/zbcg/zbgg/" + aTag.Link.Replace("../", "").Replace("./", "");
                        }
                        catch { continue; }
                        string htlDtl = string.Empty, ctx = string.Empty;
                        try
                        {
                            htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8);
                            Regex regexHtml = new Regex(@"<script[^<]*</script>|<script[^<]*</script>|<\?xml[^/]*/>|<style[^<]*</style>|<xml[^<]*</xml>");
                            htlDtl = regexHtml.Replace(htlDtl, "");
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htlDtl));
                        NodeList dtl = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "DivContent")));
                        if (dtl != null && dtl.Count > 0)
                        {
                            HtmlTxt   = System.Text.RegularExpressions.Regex.Replace(dtl.AsHtml(), "(<script)[\\s\\S]*?(</script>)", "");
                            inviteCtx = System.Text.RegularExpressions.Regex.Replace(HtmlTxt, "(<script)[\\s\\S]*?(</script>)", "");
                            inviteCtx = System.Text.RegularExpressions.Regex.Replace(inviteCtx, "<[^>]*>", "").Replace("&nbsp;", "").Replace(" ", "").Replace("\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n");
                            string InvType = prjName;
                            if (InvType.Contains("施工"))
                            {
                                inviteType = "施工";
                            }
                            if (InvType.Contains("监理"))
                            {
                                inviteType = "监理";
                            }
                            if (InvType.Contains("设计"))
                            {
                                inviteType = "设计";
                            }
                            if (InvType.Contains("勘察"))
                            {
                                inviteType = "勘察";
                            }
                            if (InvType.Contains("服务"))
                            {
                                inviteType = "服务";
                            }
                            if (InvType.Contains("劳务分包"))
                            {
                                inviteType = "劳务分包";
                            }
                            if (InvType.Contains("专业分包"))
                            {
                                inviteType = "专业分包";
                            }
                            if (InvType.Contains("小型施工"))
                            {
                                inviteType = "小型工程";
                            }
                            if (InvType.Contains("设备材料"))
                            {
                                inviteType = "设备材料";
                            }
                            Regex regPrjAddr = new Regex(@"(工程位置|工程地点|工程地址|详细地址|地点|地址)(:|:)[^\r\n]+\r\n");
                            prjAddress = regPrjAddr.Match(inviteCtx).Value.Replace("工程位置", "").Replace("工程地点", "").Replace("工程地址", "").Replace("详细地址", "").Replace("地点", "").Replace("地址", "").Replace(":", "").Replace(":", "").Trim();
                            Regex regBuildUnit = new Regex(@"(招标代理机构|招标单位|招标人|招标单位(盖章))(:|:)[^\r\n]+\r\n");
                            buildUnit = regBuildUnit.Match(inviteCtx).Value.Replace("招标代理机构", "").Replace("招标单位", "").Replace("招标人", "").Replace("(盖章)", "").Replace(":", "").Replace(":", "").Trim();
                            Regex regPrjCode = new Regex(@"(工程编号|项目编号|编号)(:|:)[^\r\n]+\r\n");
                            code    = regPrjCode.Match(inviteCtx).Value.Replace("工程编号", "").Replace("项目编号", "").Replace("编号", "").Replace(":", "").Replace(":", "").Trim();
                            msgType = "深圳市宝安区沙井街道办事处";
                            if (string.IsNullOrEmpty(prjAddress) || Encoding.Default.GetByteCount(prjAddress) > 150)
                            {
                                prjAddress = "见招标信息";
                            }
                            code       = ToolHtml.GetSubString(code, 50);
                            buildUnit  = ToolHtml.GetSubString(buildUnit, 150);
                            specType   = "建设工程";
                            inviteType = "小型工程";
                            if (string.IsNullOrEmpty(buildUnit))
                            {
                                buildUnit = "深圳市宝安区沙井街道办事处";
                            }
                            inviteType = ToolHtml.GetInviteType(inviteType);
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳区及街道工程", "宝安区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Example #20
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "MoreInfoList1_Pager")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[0].ToPlainTextString().GetRegexBegEnd("总页数", "当前").Replace(":", "");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState = this.ToolWebSite.GetAspNetViewState(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(
                        new string[] {
                        "__VIEWSTATE",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT"
                    },
                        new string[] {
                        viewState,
                        "MoreInfoList1$Pager",
                        i.ToString()
                    }
                        );
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        prjName   = aTag.GetAttribute("title");
                        beginDate = tr.ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://www.gaxqjyzx.com" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt   = dtlNode.AsHtml();
                            inviteCtx = HtmlTxt.ToCtxString();

                            code      = inviteCtx.GetCodeRegex();
                            buildUnit = inviteCtx.GetBuildRegex();
                            if (buildUnit.Contains("招标代理机构"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理机构"));
                            }
                            if (buildUnit.Contains("代理机构"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("代理机构"));
                            }
                            prjAddress = inviteCtx.GetAddressRegex();
                            inviteType = prjName.GetInviteBidType();
                            specType   = "建设工程";
                            msgType    = "贵安新区公共资源交易中心";
                            InviteInfo info = ToolDb.GenInviteInfo("贵州省", "贵州省及地市", "贵安新区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Example #21
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "p_bar")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("/", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.cqzb.gov.cn/class-5-1(" + i + ").aspx");
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "ztb_list_right")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty,
                               city = string.Empty;
                        INode node  = listNode[j];
                        ATag  aTag  = node.GetATag();
                        prjName   = aTag.GetAttribute("title");
                        beginDate = node.ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://www.cqzb.gov.cn/" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "ztb_zbxx1")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml();
                            inviteCtx  = HtmlTxt.ToCtxString();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            prjAddress = inviteCtx.GetAddressRegex();
                            code       = inviteCtx.GetCodeRegex();
                            specType   = "建设工程";
                            inviteType = prjName.GetInviteBidType();
                            msgType    = "重庆市招标投标综合网";
                            InviteInfo info = ToolDb.GenInviteInfo("重庆市", "重庆市及区县", city, string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.cqzb.gov.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Example #22
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default, ref cookiestr);
                Regex regexHtml = new Regex(@"<script[^<]*</script>");
                htl = regexHtml.Replace(htl, "");
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser    = new Parser(new Lexer(htl));
            NodeList nodeList  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("align", "right")));
            Regex    regexPage = new Regex(@"共\d+页");

            try
            {
                page = int.Parse(regexPage.Match(nodeList.AsString()).Value.Trim(new char[] { '共', '页' }));
            }
            catch (Exception)
            { }
            for (int i = 1; i < page; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl + "&otype=&pageNum=" + i.ToString()), Encoding.Default);
                        Regex regexHtml = new Regex(@"<script[^<]*</script>");
                        htl = regexHtml.Replace(htl, "");
                    }
                    catch (Exception ex) { continue; }
                }
                parser = new Parser(new Lexer(htl));
                NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellpadding", "1")));
                if (tableNodeList != null && tableNodeList.Count > 0)
                {
                    TableTag table = (TableTag)tableNodeList[0];
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string   code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr = table.Rows[j];
                        prjName   = tr.Columns[0].ToPlainTextString().Trim();
                        beginDate = tr.Columns[1].ToPlainTextString().Replace("&nbsp; ", "").Trim().Substring(0, 10);
                        ATag aTag = tr.Columns[0].SearchFor(typeof(ATag), true)[0] as ATag;
                        InfoUrl = "http://market.meizhou.gov.cn/website/deptwebsite/1925/Content.jsp?issueId=15488&msgType=00&filePath=" + aTag.GetAttribute("onclick").Replace("showDeptContent('1925','", "");
                        int    ii = InfoUrl.IndexOf("'");
                        string oo = InfoUrl.Remove(ii).Trim();
                        if (oo.Contains("content.php"))
                        {
                            string url = aTag.GetAttribute("onclick").Replace("showDeptContent('1925','", "");
                            ii = url.IndexOf("'");
                            oo = url.Remove(ii).ToString();
                        }
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(oo), Encoding.Default).Replace("&nbsp;", "");
                            Regex regexHtml = new Regex(@"<script[^<]*</script>");
                            htmldetail = regexHtml.Replace(htmldetail, "");
                        }
                        catch (Exception)
                        {
                            Logger.Error("InviteMeiZhoouJS");
                            continue;
                        }
                        Parser   parserdetail = new Parser(new Lexer(htmldetail));
                        NodeList dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("P"), new HasAttributeFilter("class", "MsoNormal")));
                        if (dtnode == null || dtnode.Count < 1)
                        {
                            parserdetail = new Parser(new Lexer(htmldetail));
                            dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "WordSection1")));
                        }
                        if (dtnode.Count > 0 && dtnode != null)
                        {
                            HtmlTxt = dtnode.AsHtml();
                            for (int k = 0; k < dtnode.Count; k++)
                            {
                                string tr1 = string.Empty;
                                tr1 = dtnode[k].ToPlainTextString().Replace(" ", "").Trim();
                                if (k == 0)
                                {
                                    string InvType = tr1;
                                    inviteType = ToolHtml.GetInviteTypes(InvType);
                                }
                                inviteCtx += tr1 + ":" + "\r\n";
                            }
                            Regex regPrjAddr = new Regex(@"(工程地点|建设地点):[^\r\n]+\r\n");
                            prjAddress = regPrjAddr.Match(inviteCtx).Value.Replace("工程地点:", "").Replace("建设地点", "").Replace(":", "").Replace(";", "").Trim();

                            Regex bildUnit = new Regex(@"(招标人|招标人(盖章)|招标人):[^\r\n]+[\r\n]{1}");
                            buildUnit = bildUnit.Match(inviteCtx).Value.Replace("招  标人:", "").Replace("招标人(盖章):", "").Replace("招标人:", "").Trim();
                            if (buildUnit != "" && buildUnit.Contains(":"))
                            {
                                int zz = buildUnit.IndexOf(":");
                                buildUnit = buildUnit.Remove(zz).ToString();
                            }
                            Regex regcode = new Regex(@"(招标项目编号|项目编号)(:|:)[^\r\n]+[\r\n]{1}");
                            code = regcode.Match(inviteCtx).Value.Replace("招标项目编号", "").Replace("项目编号", "").Replace(":", "").Replace(":", "").Trim();

                            Regex  regoType = new Regex(@"工程类型:[^\r\n]+\r\n");
                            string oType    = regoType.Match(inviteCtx).Value.Replace("工程类型:", "").Trim();
                            if (oType.Contains("房建"))
                            {
                                otherType = "房建及工业民用建筑";
                            }
                            else if (oType.Contains("市政"))
                            {
                                otherType = "市政工程";
                            }
                            else if (oType.Contains("园林绿化"))
                            {
                                otherType = "园林绿化工程";
                            }
                            else if (oType.Contains("装饰") || oType.Contains("装修"))
                            {
                                otherType = "装饰装修工程";
                            }
                            else if (oType.Contains("电力"))
                            {
                                otherType = "电力工程";
                            }
                            else if (oType.Contains("水利"))
                            {
                                otherType = "水利工程";
                            }
                            if (oType.Contains("环保"))
                            {
                                otherType = "环保工程";
                            }
                            if (buildUnit == "")
                            {
                                buildUnit = "";
                            }
                            if (buildUnit.Contains("梅州市建设工程交易中心"))
                            {
                                buildUnit = "";
                            }
                            msgType   = "梅州市建设工程交易中心";
                            specType  = "建设工程";
                            inviteCtx = inviteCtx.Replace("<?", "").Replace("xml:namespace prefix = o ns = ", "").Replace("urn:schemas-microsoft-com:office:office", "").Replace("/>", "").Trim();
                            inviteCtx = inviteCtx.Replace("<?", "").Replace("xml:namespace prefix = ns0 ns = ", "").Replace("urn:schemas-microsoft-com:office:smarttags", "").Replace("/>", "").Trim();
                            inviteCtx = inviteCtx.Replace("<?", "").Replace("xml:namespace prefix = st1 ns = ", "").Replace("urn:schemas-microsoft-com:office:smarttags", "").Replace("/>", "").Trim();
                            inviteCtx = inviteCtx.Replace("?xml:namespaceprefix=o/>", "").Trim();
                            inviteCtx = inviteCtx.Replace("<?xml:namespaceprefix=st1/>", "").Trim();
                            if (Encoding.Default.GetByteCount(code) >= 50)
                            {
                                code = string.Empty;
                            }
                            if (Encoding.Default.GetByteCount(prjAddress) >= 150)
                            {
                                prjAddress = string.Empty;
                            }
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "梅州市区", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, oo, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Example #23
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("form"), new HasAttributeFilter("name", "qPageForm")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    NodeList aNode = new Parser(new Lexer(pageNode.ToHtml())).ExtractAllNodesThatMatch(new TagNameFilter("a"));
                    if (aNode != null && aNode.Count > 0)
                    {
                        string temp = aNode[aNode.Count - 2].GetATagHref().Replace("turnOverPage", "").Replace("(", "").Replace(")", "").Replace(";", "");
                        pageInt = int.Parse(temp);
                    }
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "channelCode", "pageIndex", "pageSize", "pointPageIndexId" }, new string[] {
                        "0005", i.ToString(), "15", "1"
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://yangjiang.gdgpo.com/queryMoreInfoList.do", nvc, Encoding.UTF8);
                    }
                    catch
                    {
                        continue;
                    }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "m_m_c_list")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        ATag aTag = listNode[j].GetATag(1);
                        prjName   = aTag.GetAttribute("title");
                        beginDate = listNode[j].ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://yangjiang.gdgpo.com" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "zw_c_c_cont")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt   = dtlNode.AsHtml();//.Replace("<br", "\r\n<br");
                            inviteCtx = HtmlTxt.ToCtxString();
                            code      = inviteCtx.GetCodeRegex().GetCodeDel();

                            buildUnit  = inviteCtx.GetBuildRegex();
                            prjAddress = inviteCtx.GetAddressRegex();
                            inviteType = prjName.GetInviteBidType();
                            msgType    = "阳江市政府采购";
                            specType   = "政府采购";
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "阳江市区", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList fileNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (fileNode != null && fileNode.Count > 0)
                            {
                                for (int k = 0; k < fileNode.Count; k++)
                                {
                                    ATag fileAtag = fileNode[k].GetATag();
                                    if (fileAtag.IsAtagAttach())
                                    {
                                        string fileName = fileAtag.LinkText.ToNodeString().Replace(" ", "");
                                        string fileLink = fileAtag.Link;
                                        if (!fileLink.ToLower().Contains("http"))
                                        {
                                            fileLink = "http://yangjiang.gdgpo.gov.cn" + fileAtag.Link;
                                        }
                                        base.AttachList.Add(ToolDb.GenBaseAttach(fileName, info.Id, fileLink));
                                    }
                                }
                            }
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Example #24
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "yesh fl")));

            if (sNode != null && sNode.Count > 0)
            {
                try
                {
                    string temp = sNode[0].ToNodePlainString().GetRegexBegEnd("/", "页");
                    pageInt = int.Parse(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.szlhxq.gov.cn/mzbsc/zwgk69/cgzb/zbgg282/14843-" + i.ToString() + ".html", Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList viewList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "news1_list")), true), new TagNameFilter("li")));
                if (viewList != null && viewList.Count > 0)
                {
                    for (int j = 0; j < viewList.Count; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        beginDate = viewList[j].ToNodePlainString().GetDateRegex();
                        ATag aTag = viewList[j].GetATag();
                        prjName = aTag.GetAttribute("title");
                        InfoUrl = "http://www.szlhxq.gov.cn" + aTag.Link;
                        string htmDtl = string.Empty;
                        try
                        {
                            htmDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmDtl));
                        NodeList dtl = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "tit-content")));
                        if (dtl != null && dtl.Count > 0)
                        {
                            HtmlTxt    = dtl.AsHtml();
                            inviteCtx  = System.Text.RegularExpressions.Regex.Replace(HtmlTxt, "(<script)[\\s\\S]*?(</script>)", "");
                            inviteCtx  = System.Text.RegularExpressions.Regex.Replace(inviteCtx.Replace("<br/>", "\r\n").Replace("<BR/>", "\r\n").Replace("<BR>", "\r\n").Replace("<br>", "\r\n"), "<[^>]*>", "").Replace("&nbsp;", "").Replace(" ", "").Replace("\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\r\n", "\r\n").Replace("\r\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\t", "").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n");
                            inviteType = prjName.GetInviteBidType();
                            prjAddress = inviteCtx.GetAddressRegex();
                            if (prjAddress.Contains("**"))
                            {
                                prjAddress = string.Empty;
                            }
                            buildUnit = inviteCtx.GetBuildRegex();
                            if (buildUnit.Contains("资质"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("资质"));
                            }
                            code    = inviteCtx.GetCodeRegex().GetCodeDel();
                            msgType = "深圳市龙华新区民治街道办事处";
                            if (string.IsNullOrEmpty(prjAddress))
                            {
                                prjAddress = "见中标信息";
                            }
                            specType   = "建设工程";
                            inviteType = "小型工程";
                            if (string.IsNullOrEmpty(buildUnit))
                            {
                                buildUnit = "深圳市龙华新区民治街道办事处";
                            }
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳区及街道工程", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Example #25
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("li"), new HasAttributeFilter("class", "wb-page-li")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("/", "\r");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                SiteUrl = "http://www.jxsggzy.cn/web/jyxx/002006/002006001/" + i + ".html";
                try
                {
                    html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr);
                }
                catch { continue; }

                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("li"), new HasAttributeFilter("class", "ewb-list-node clearfix")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;
                        ATag aTag = listNode[j].GetATag();
                        prjName = aTag.GetAttribute("title");
                        if (string.IsNullOrWhiteSpace(prjName))
                        {
                            prjName = aTag.LinkText;
                        }
                        beginDate = listNode[j].ToPlainTextString().GetDateRegex();
                        if (prjName[2].Equals('县') || prjName[2].Equals('区') || prjName[2].Equals('市'))
                        {
                            area = prjName.Substring(0, 3);
                        }
                        string a = aTag.Link;
                        InfoUrl = "http://www.jxsggzy.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "article-info")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml();
                            inviteCtx  = HtmlTxt.ToCtxString();
                            prjAddress = inviteCtx.GetAddressRegex();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            if (buildUnit.Contains("单位章"))
                            {
                                buildUnit = string.Empty;
                            }
                            if (buildUnit.Contains("联系人"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("联系人"));
                            }
                            code       = inviteCtx.GetCodeRegex().GetCodeDel();
                            inviteType = "政府采购";
                            specType   = "政府采购";
                            msgType    = "江西省公共资源交易中心";
                            InviteInfo info = ToolDb.GenInviteInfo("江西省", "江西省及地市", area, string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Example #26
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default, ref cookiestr);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("nowrap", "true")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("总页数", "当前页").Replace(":", "");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState = this.ToolWebSite.GetAspNetViewState(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__VIEWSTATE",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT"
                    }, new string[] {
                        viewState,
                        "MoreInfoList1$Pager",
                        i.ToString()
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        prjName   = aTag.GetAttribute("title");
                        beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://www.dyggzyjyzx.com" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml();
                            inviteCtx  = HtmlTxt.ToCtxString();
                            prjAddress = inviteCtx.GetAddressRegex();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            if (buildUnit.Contains("公司"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                            }
                            if (buildUnit.Contains("地址"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址"));
                            }
                            if (buildUnit.Contains("招标代理"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理"));
                            }
                            code       = inviteCtx.GetCodeRegex().GetCodeDel();
                            msgType    = "大冶市公共资源交易中心";
                            specType   = "建设工程";
                            inviteType = prjName.GetInviteBidType();
                            buildUnit  = buildUnit.Replace(" ", "");
                            InviteInfo info = ToolDb.GenInviteInfo("湖北省", "湖北省及地市", "大冶市", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.dyggzyjyzx.com/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Example #27
0
 public async Task <AttendeeInvite> GetInviteAsync(InviteInfo inviteInfo)
 {
     return(await _attendeeRepository.GetInviteAsync(inviteInfo)
            .ConfigureAwait(continueOnCapturedContext: false));
 }
Example #28
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("width", "700")));

            if (sNode != null && sNode.Count > 0)
            {
                try
                {
                    string temp = sNode.AsString().ToNodeString();
                    Regex  reg  = new Regex(@"/[^下一页]+下一页");
                    string page = reg.Match(temp).Value.Replace("/", "").Replace("下一页", "");
                    pageInt = Convert.ToInt32(page);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.stjs.gov.cn/zbtb/qxzhaobiaonews.asp?page=" + i.ToString(), Encoding.Default);
                    }
                    catch
                    { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "705")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[nodeList.Count - 1] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        TableRow tr      = table.Rows[j];
                        string   prjType = tr.Columns[1].ToNodePlainString();
                        if (prjType.Contains("中标") || prjType.Contains("结果"))
                        {
                            string prjName = string.Empty,
                                   buildUnit = string.Empty, bidUnit = string.Empty,
                                   bidMoney = string.Empty, code = string.Empty,
                                   bidDate = string.Empty,
                                   beginDate = string.Empty,
                                   endDate = string.Empty, bidType = string.Empty,
                                   specType = string.Empty, InfoUrl = string.Empty,
                                   msgType = string.Empty, bidCtx = string.Empty,
                                   prjAddress = string.Empty, remark = string.Empty,
                                   prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                            prjName   = prjType.Replace("[中标公示]", "");
                            bidType   = prjName.GetInviteBidType();
                            beginDate = tr.Columns[2].ToNodePlainString().Replace(".", "-");
                            InfoUrl   = "http://www.stjs.gov.cn/" + tr.Columns[1].GetATagHref().Replace("../", "");
                            string htlDtl = string.Empty;
                            try
                            {
                                htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default);
                                htlDtl = htlDtl.GetJsString();
                            }
                            catch { continue; }
                            parser = new Parser(new Lexer(htlDtl));
                            NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "680")));
                            if (dtlList != null && dtlList.Count > 0)
                            {
                                HtmlTxt = dtlList.ToHtml();
                                bidCtx  = HtmlTxt.ToCtxString();
                                string ctx = string.Empty;
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList tabList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "100%")));
                                if (tabList != null && tabList.Count > 0)
                                {
                                    TableTag tab = tabList[0] as TableTag;
                                    for (int k = 0; k < tab.RowCount; k++)
                                    {
                                        for (int d = 0; d < tab.Rows[k].ColumnCount; d++)
                                        {
                                            if (d % 2 == 0)
                                            {
                                                ctx += tab.Rows[k].Columns[d].ToPlainTextString().Replace(" ", "").Replace("\n", "").Replace("\r", "") + ":";
                                            }
                                            else
                                            {
                                                ctx += tab.Rows[k].Columns[d].ToPlainTextString().Replace(" ", "").Replace("\n", "").Replace("\r", "") + "\r\n";
                                            }
                                        }
                                    }
                                    bidUnit    = ctx.GetBidRegex();
                                    bidMoney   = ctx.GetMoneyRegex();
                                    buildUnit  = ctx.GetBuildRegex();
                                    prjAddress = ctx.GetAddressRegex();
                                    code       = ctx.GetCodeRegex();
                                }
                                else
                                {
                                    bidUnit    = bidCtx.GetBidRegex();
                                    bidMoney   = bidCtx.GetMoneyRegex();
                                    buildUnit  = bidCtx.GetBuildRegex();
                                    prjAddress = bidCtx.GetAddressRegex();
                                    code       = bidCtx.GetCodeRegex();
                                }



                                msgType  = "汕头建设网";
                                specType = "建设工程";
                                BidInfo info = ToolDb.GenBidInfo("广东省", "汕头市区", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType,
                                                                 bidMoney, InfoUrl, prjMgr, HtmlTxt);
                                list.Add(info);
                                if (!crawlAll && list.Count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }
                        }
                        else
                        {
                            string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                                   prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                                   specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                                   remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                                   CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                            prjName    = prjType.Replace("[招标公告]", "");
                            inviteType = prjName.GetInviteBidType();
                            beginDate  = tr.Columns[2].ToNodePlainString().Replace(".", "-");
                            InfoUrl    = "http://www.stjs.gov.cn/" + tr.Columns[1].GetATagHref().Replace("../", "");
                            string htlDtl = string.Empty;
                            try
                            {
                                htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default);
                                htlDtl = htlDtl.GetJsString();
                            }
                            catch { continue; }
                            parser = new Parser(new Lexer(htlDtl));
                            NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "680")));
                            if (dtlList != null && dtlList.Count > 0)
                            {
                                HtmlTxt   = dtlList.ToHtml();
                                inviteCtx = HtmlTxt.ToCtxString().Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n");

                                string ctx = string.Empty;
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList tabList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "100%")));
                                if (tabList != null && tabList.Count > 0)
                                {
                                    TableTag tab = tabList[0] as TableTag;
                                    for (int k = 0; k < tab.RowCount; k++)
                                    {
                                        for (int d = 0; d < tab.Rows[k].ColumnCount; d++)
                                        {
                                            if (d % 2 == 0)
                                            {
                                                ctx += tab.Rows[k].Columns[d].ToPlainTextString().Replace(" ", "").Replace("\r\n", "") + ":";
                                            }
                                            else
                                            {
                                                ctx += tab.Rows[k].Columns[d].ToPlainTextString().Replace(" ", "").Replace("\r\n", "") + "\r\n";
                                            }
                                        }
                                    }
                                    buildUnit  = ctx.GetBuildRegex();
                                    code       = ctx.GetCodeRegex().Replace("/", "");
                                    prjAddress = ctx.GetAddressRegex();
                                }
                                else
                                {
                                    buildUnit  = inviteCtx.GetBuildRegex();
                                    code       = inviteCtx.GetCodeRegex().Replace("/", "");
                                    prjAddress = inviteCtx.GetAddressRegex();
                                }

                                msgType  = "汕头建设网";
                                specType = "建设工程";
                                InviteInfo info = ToolDb.GenInviteInfo("广东省", "汕头市区", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                                list.Add(info);
                                if (!crawlAll && list.Count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Example #29
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(htl));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "wb-page-items clearfix")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[0].ToPlainTextString().GetRegexBegEnd("/", "转到");
                    page = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= page; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "?pageing=" + i.ToString(), Encoding.UTF8);
                    }
                    catch  { continue; }
                }
                parser = new Parser(new Lexer(htl));
                NodeList node = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "r-items")), true), new TagNameFilter("li")));
                if (node != null && node.Count > 0)
                {
                    for (int j = 0; j < node.Count; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        ATag aTag = node[j].GetATag();
                        InfoUrl   = "http://ggzy.yunfu.gov.cn" + aTag.Link;
                        prjName   = aTag.LinkText;
                        beginDate = node[j].ToPlainTextString().GetDateRegex();
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace("&nbsp;", "").GetJsString();
                        }
                        catch
                        {
                            continue;
                        }
                        Parser   parserdetail = new Parser(new Lexer(htmldetail));
                        NodeList dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "mainContent")));
                        if (dtnode != null && dtnode.Count > 0)
                        {
                            HtmlTxt   = dtnode.AsHtml();
                            inviteCtx = HtmlTxt.ToCtxString().Replace("&ldquo;", "").Replace("&rdquo;", "");
                            string ctx = inviteCtx;
                            if (string.IsNullOrEmpty(inviteCtx))
                            {
                                ctx       = dtnode.ToHtml().Replace("</span>", "\r\n").ToCtxString().Replace("-->", "").Replace("&nbsp", "").Replace("。", "").Replace("\r", "\r\n").Replace("\r\n\n\t\r\n\n\t", "\r\n").Replace("\r\n\n\t\r\n\n\t", "\r\n").Replace("\r\n\n\t\r\n\n\t", "\r\n").Replace("\r\n\n\t\r\n\n\t", "\r\n").Replace("\r\n\n\r\n\n", "\r\n").Replace("\r\n\n\r\n\n", "\r\n").Replace("\r\n\n\r\n\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Trim();
                                inviteCtx = dtnode.ToHtml().ToCtxString().Replace("-->", "").Replace("&nbsp", "").Replace("。", "").Replace("\r", "\r\n").Replace("\r\n\n\t\r\n\n\t", "\r\n").Replace("\r\n\n\t\r\n\n\t", "\r\n").Replace("\r\n\n\t\r\n\n\t", "\r\n").Replace("\r\n\n\t\r\n\n\t", "\r\n").Replace("\r\n\n\r\n\n", "\r\n").Replace("\r\n\n\r\n\n", "\r\n").Replace("\r\n\n\r\n\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Trim();
                                Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>");
                                inviteCtx = regexHtml.Replace(inviteCtx, "").Replace("(盖章)", "").Replace(" ", "").Trim();
                                ctx       = regexHtml.Replace(ctx, "").Replace("(盖章)", "").Replace(" ", "").Trim();
                            }
                            buildUnit  = ctx.GetBuildRegex();
                            prjAddress = ctx.GetAddressRegex();
                            msgType    = "云浮市建设工程交易中心";
                            specType   = "建设工程";
                            if (buildUnit.Contains("招标代理机构"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理机构")).ToString().Trim();
                            }
                            if (buildUnit.Contains("招标代理"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理")).ToString().Trim();
                            }
                            buildUnit = buildUnit.Replace(":潘先生联系电话:0766--3481216", "").Trim();

                            parserdetail.Reset();
                            NodeList nameNode = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("h2"), new HasAttributeFilter("class", "post-title")));
                            if (nameNode != null && nameNode.Count > 0)
                            {
                                string tempName = nameNode[0].ToNodePlainString();
                                if (!string.IsNullOrWhiteSpace(tempName))
                                {
                                    prjName = tempName;
                                }
                            }
                            inviteType = ToolHtml.GetInviteTypes(prjName);
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "云浮市区", "",
                                                                   string.Empty, code, prjName, prjAddress, buildUnit,
                                                                   beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parserdetail = new Parser(new Lexer(HtmlTxt));
                            NodeList file = parserdetail.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (file != null && file.Count > 0)
                            {
                                for (int d = 0; d < file.Count; d++)
                                {
                                    ATag aFile = file.SearchFor(typeof(ATag), true)[d] as ATag;
                                    if (aFile.IsAtagAttach())
                                    {
                                        string     downloadURL = "http://ggzy.yunfu.gov.cn/" + aFile.Link;
                                        BaseAttach attach      = ToolDb.GenBaseAttach(aFile.LinkText, info.Id, downloadURL);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Example #30
0
        internal void Update(InviteInfo model)
		{
			Update(model as InviteReference);

			if (model.IsRevoked != null)
				IsRevoked = model.IsRevoked.Value;
			if (model.IsTemporary != null)
				IsTemporary = model.IsTemporary.Value;
			if (model.MaxAge != null)
				MaxAge = model.MaxAge.Value;
			if (model.MaxUses != null)
				MaxUses = model.MaxUses.Value;
			if (model.Uses != null)
				Uses = model.Uses.Value;
			if (model.CreatedAt != null)
				CreatedAt = model.CreatedAt.Value;
		}
Example #31
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(base.SiteUrl, Encoding.Default);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(htl));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("a"), new HasAttributeFilter("class", "wb-page-default wb-page-number wb-page-family")));

            if (nodeList != null && nodeList.Count > 0)
            {
                try
                {
                    string temp = nodeList.AsString().Replace("1/", "");
                    page = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= page; i++)
            {
                if (i > 1)
                {
                    //NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[]{
                    //     "theID=","pageNo"}, new string[] { "76", i.ToString() });

                    try
                    {
                        //htl = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8);
                        htl = this.ToolWebSite.GetHtmlByUrl("http://www.jysggzy.com/TPFront/jsgc/004001/?pageing=" + i, Encoding.Default);
                    }
                    catch (Exception ex) { continue; }
                }
                parser = new Parser(new Lexer(htl));
                NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "ewb-data-items ewb-pt6")), true), new TagNameFilter("li")));
                if (tableNodeList != null && tableNodeList.Count > 0)
                {
                    for (int j = 0; j < tableNodeList.Count; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        prjName   = tableNodeList[j].ToNodePlainString().Replace(" ", "");
                        beginDate = tableNodeList[j].ToPlainTextString().GetDateRegex();
                        if (!string.IsNullOrEmpty(beginDate))
                        {
                            prjName = prjName.Replace(beginDate, "");
                        }
                        InfoUrl = "http://www.jysggzy.com/" + tableNodeList[j].GetATagHref();
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent")));

                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml();
                            inviteCtx  = HtmlTxt.ToCtxString();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            prjAddress = inviteCtx.GetAddressRegex();
                            if (buildUnit.Contains("代理"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("代理"));
                            }
                            code = inviteCtx.GetCodeRegex();
                            if (buildUnit.Contains("联系电话"))
                            {
                                buildUnit = string.Empty;
                            }
                            msgType    = "揭阳市建设工程交易中心";
                            inviteType = prjName.GetInviteBidType();
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "揭阳市区", "",
                                                                   string.Empty, code, prjName, prjAddress, buildUnit,
                                                                   beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            //parser = new Parser(new Lexer(HtmlTxt));
                            //NodeList fileNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            //if (fileNode != null && fileNode.Count > 0)
                            //{
                            //    for (int k = 0; k < fileNode.Count; k++)
                            //    {
                            //        ATag fileAtag = fileNode[k].GetATag();
                            //        if (fileAtag.IsAtagAttach())
                            //        {
                            //            try
                            //            {
                            //                string link = string.Empty;
                            //                if (fileAtag.Link.Contains("gdgpo"))
                            //                    link = fileAtag.Link;
                            //                else
                            //                    link = "http://jieyang.gdgpo.com" + fileAtag.Link;
                            //                BaseAttach attach = ToolDb.GenBaseAttach(fileAtag.LinkText, info.Id, link);
                            //                base.AttachList.Add(attach);
                            //            }
                            //            catch { }
                            //        }
                            //    }
                            //}
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }