Beispiel #1
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "clearfix")), true), new TagNameFilter("a")));

            if (sNode != null && sNode.Count > 0)
            {
                try
                {
                    string temp = sNode[sNode.Count - 1].GetATag().GetAttribute("onclick").Replace("(", "kdxx").Replace(",", "xxdk");
                    pageInt = int.Parse(temp.GetRegexBegEnd("kdxx", "xxdk"));
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://ps.szzfcg.cn/portal/topicView.do?method=view1&id=2887108&siteId=9&underwayFlag=undefined&tstmp=17%3A48%3A43%20GMT%2B0800&page=" + i, Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "fixed")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        TableRow tr = table.Rows[j];
                        beginDate = tr.Columns[1].ToNodePlainString().GetDateRegex("yyyy/MM/dd");
                        ATag aTag = tr.Columns[0].GetATag();
                        prjName = aTag.GetAttribute("title");

                        Regex  regexLink = new Regex(@"id=[^-]+");
                        string id        = regexLink.Match(aTag.Link).Value;
                        InfoUrl = "http://ps.szzfcg.cn/portal/documentView.do?method=view&" + id;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("body"));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.AsHtml();
                            bidCtx  = HtmlTxt.ToCtxString().Replace("\r\n\r\n\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\t\r\n\t\r\n", "\r\n\t").Replace("\r\n\t\r\n\t\r\n", "\r\n\t").Replace("\r\n\t\r\n\t\r\n", "\r\n\t").Replace("\r\n\t\r\n\t\r\n", "\r\n\t");
                            bool isOk = true;
                            bidCtx = System.Web.HttpUtility.HtmlDecode(bidCtx);
                            while (isOk)
                            {
                                string str = bidCtx.GetRegexBegEnd("&#", ";");
                                if (!string.IsNullOrEmpty(str))
                                {
                                    bidCtx = bidCtx.Replace("&#" + str + ";", "");
                                }
                                else
                                {
                                    isOk = false;
                                }
                            }

                            buildUnit  = bidCtx.GetBuildRegex();
                            prjAddress = bidCtx.GetAddressRegex();
                            bidUnit    = bidCtx.GetBidRegex();
                            bidMoney   = bidCtx.GetMoneyRegex();
                            if (!string.IsNullOrEmpty(bidUnit) && bidMoney == "0")
                            {
                                bidMoney = bidCtx.GetMoneyRegex(null, true, "万元");
                            }
                            if (!string.IsNullOrEmpty(bidUnit) && (string.IsNullOrEmpty(bidMoney) || bidMoney == "0"))
                            {
                                bidMoney = bidCtx.GetMoneyRegex(new string[] { "中标金额", "金额" }, false, "万元");
                            }
                            string ctx = string.Empty;
                            #region 多table匹配
                            if (string.IsNullOrEmpty(bidUnit))
                            {
                                parser = new Parser(new Lexer(htmldtl));
                                NodeList dtList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "holder")), true), new TagNameFilter("table")));
                                if (dtList != null && dtList.Count > 0)
                                {
                                    for (int c = 0; c < dtList.Count; c++)
                                    {
                                        TableTag tab = dtList[c] as TableTag;
                                        if (IsTableBid(tab))
                                        {
                                            for (int d = 0; d < tab.Rows[0].ColumnCount; d++)
                                            {
                                                try
                                                {
                                                    ctx += tab.Rows[0].Columns[d].ToNodePlainString() + ":";
                                                    ctx += tab.Rows[1].Columns[d].ToNodePlainString() + "\r\n";
                                                }
                                                catch { }
                                            }
                                            break;
                                        }
                                    }
                                    if (string.IsNullOrEmpty(ctx))
                                    {
                                        if (dtList.Count > 3)
                                        {
                                            TableTag tab = dtList[2] as TableTag;
                                            if (tab.RowCount > 1)
                                            {
                                                for (int d = 0; d < tab.Rows[0].ColumnCount; d++)
                                                {
                                                    ctx += tab.Rows[0].Columns[d].ToNodePlainString() + ":";
                                                    ctx += tab.Rows[1].Columns[d].ToNodePlainString() + "\r\n";
                                                }
                                            }
                                            if (!ctx.Contains("投标供应商") || !ctx.Contains("成交供应商") || !ctx.Contains("中标供应商"))
                                            {
                                                ctx = string.Empty;
                                                tab = dtList[1] as TableTag;
                                                if (tab.RowCount > 1)
                                                {
                                                    for (int d = 0; d < tab.Rows[0].ColumnCount; d++)
                                                    {
                                                        ctx += tab.Rows[0].Columns[d].ToNodePlainString() + ":";
                                                        ctx += tab.Rows[1].Columns[d].ToNodePlainString() + "\r\n";
                                                    }
                                                }
                                            }
                                        }
                                        else if (dtList.Count > 2)
                                        {
                                            TableTag tab = dtList[1] as TableTag;
                                            if (tab.RowCount > 1)
                                            {
                                                for (int d = 0; d < tab.Rows[0].ColumnCount; d++)
                                                {
                                                    ctx += tab.Rows[0].Columns[d].ToNodePlainString() + ":";
                                                    ctx += tab.Rows[1].Columns[d].ToNodePlainString() + "\r\n";
                                                }
                                            }
                                        }
                                        else
                                        {
                                            TableTag tab = dtList[0] as TableTag;
                                            if (tab.RowCount > 1)
                                            {
                                                for (int d = 0; d < tab.Rows[0].ColumnCount; d++)
                                                {
                                                    string start = System.Web.HttpUtility.HtmlDecode(tab.Rows[0].Columns[d].ToNodePlainString());
                                                    string end   = System.Web.HttpUtility.HtmlDecode(tab.Rows[1].Columns[d].ToNodePlainString());
                                                    ctx += start + ":";
                                                    ctx += end + "\r\n";
                                                }
                                            }
                                        }
                                    }
                                    bidUnit  = ctx.GetBidRegex();
                                    bidMoney = ctx.GetMoneyRegex(new string[] { "成交金额" });
                                    if (bidMoney == "" || bidMoney == "0")
                                    {
                                        bidMoney = ctx.GetMoneyRegex();
                                    }
                                    if (!string.IsNullOrEmpty(bidUnit) && bidMoney == "0")
                                    {
                                        string   dtlCtx = string.Empty, unit = string.Empty, money = string.Empty;
                                        TableTag tab = dtList[0] as TableTag;
                                        for (int c = 0; c < tab.RowCount; c++)
                                        {
                                            if ((c + 2) <= tab.RowCount)
                                            {
                                                if (tab.Rows[c].ToNodePlainString().Contains(bidUnit))
                                                {
                                                    for (int d = 0; d < tab.Rows[c].ColumnCount; d++)
                                                    {
                                                        dtlCtx += tab.Rows[0].Columns[d].ToNodePlainString() + ":";
                                                        dtlCtx += tab.Rows[c].Columns[d].ToNodePlainString() + "\r\n";
                                                    }
                                                    break;
                                                }
                                            }
                                        }
                                        if (string.IsNullOrEmpty(dtlCtx))
                                        {
                                            Parser   tableParser = new Parser(new Lexer(HtmlTxt));
                                            NodeList tableNode   = tableParser.ExtractAllNodesThatMatch(new TagNameFilter("table"));

                                            if (string.IsNullOrEmpty(dtlCtx) && tableNode.Count > 1)
                                            {
                                                tab = tableNode[1] as TableTag;
                                                for (int c = 0; c < tab.RowCount; c++)
                                                {
                                                    if ((c + 2) <= tab.RowCount)
                                                    {
                                                        if (tab.Rows[c].ToNodePlainString().Contains(bidUnit))
                                                        {
                                                            for (int d = 0; d < tab.Rows[c].ColumnCount; d++)
                                                            {
                                                                dtlCtx += tab.Rows[0].Columns[d].ToNodePlainString() + ":";
                                                                dtlCtx += tab.Rows[c].Columns[d].ToNodePlainString() + "\r\n";
                                                            }
                                                            break;
                                                        }
                                                    }
                                                }
                                            }
                                            if (string.IsNullOrEmpty(dtlCtx) && tableNode.Count > 2)
                                            {
                                                tab = tableNode[2] as TableTag;
                                                for (int c = 0; c < tab.RowCount; c++)
                                                {
                                                    if ((c + 2) <= tab.RowCount)
                                                    {
                                                        if (tab.Rows[c].ToNodePlainString().Contains(bidUnit))
                                                        {
                                                            for (int d = 0; d < tab.Rows[c].ColumnCount; d++)
                                                            {
                                                                dtlCtx += tab.Rows[0].Columns[d].ToNodePlainString() + ":";
                                                                dtlCtx += tab.Rows[c].Columns[d].ToNodePlainString() + "\r\n";
                                                            }
                                                            break;
                                                        }
                                                    }
                                                }
                                            }
                                            if (string.IsNullOrEmpty(dtlCtx) && tableNode.Count > 3)
                                            {
                                                tab = tableNode[3] as TableTag;
                                                for (int c = 0; c < tab.RowCount; c++)
                                                {
                                                    if ((c + 2) <= tab.RowCount)
                                                    {
                                                        if (tab.Rows[c].ToNodePlainString().Contains(bidUnit))
                                                        {
                                                            for (int d = 0; d < tab.Rows[c].ColumnCount; d++)
                                                            {
                                                                dtlCtx += tab.Rows[0].Columns[d].ToNodePlainString() + ":";
                                                                dtlCtx += tab.Rows[c].Columns[d].ToNodePlainString() + "\r\n";
                                                            }
                                                            break;
                                                        }
                                                    }
                                                }
                                            }
                                            if (string.IsNullOrEmpty(dtlCtx) && tableNode.Count > 4)
                                            {
                                                tab = tableNode[4] as TableTag;
                                                for (int c = 0; c < tab.RowCount; c++)
                                                {
                                                    if ((c + 2) <= tab.RowCount)
                                                    {
                                                        if (tab.Rows[c].ToNodePlainString().Contains(bidUnit))
                                                        {
                                                            for (int d = 0; d < tab.Rows[c].ColumnCount; d++)
                                                            {
                                                                dtlCtx += tab.Rows[0].Columns[d].ToNodePlainString() + ":";
                                                                dtlCtx += tab.Rows[c].Columns[d].ToNodePlainString() + "\r\n";
                                                            }
                                                            break;
                                                        }
                                                    }
                                                }
                                            }
                                            if (string.IsNullOrEmpty(dtlCtx) && tableNode.Count > 5)
                                            {
                                                tab = tableNode[5] as TableTag;
                                                for (int c = 0; c < tab.RowCount; c++)
                                                {
                                                    if ((c + 2) <= tab.RowCount)
                                                    {
                                                        if (tab.Rows[c].ToNodePlainString().Contains(bidUnit))
                                                        {
                                                            for (int d = 0; d < tab.Rows[c].ColumnCount; d++)
                                                            {
                                                                dtlCtx += tab.Rows[0].Columns[d].ToNodePlainString() + ":";
                                                                dtlCtx += tab.Rows[c].Columns[d].ToNodePlainString() + "\r\n";
                                                            }
                                                            break;
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                        unit  = dtlCtx.GetBidRegex();
                                        money = dtlCtx.GetMoneyRegex();
                                        if (bidUnit == unit)
                                        {
                                            bidMoney = money;
                                        }
                                    }
                                    if (bidUnit.Contains("无中标") || bidUnit.Contains("没有"))
                                    {
                                        bidUnit  = "没有中标商";
                                        bidMoney = "0";
                                    }
                                }
                            }
                            if (string.IsNullOrEmpty(bidUnit))
                            {
                                parser = new Parser(new Lexer(htmldtl));
                                NodeList dtList = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                                if (dtList != null && dtList.Count > 0)
                                {
                                    for (int c = 0; c < dtList.Count; c++)
                                    {
                                        TableTag tab = dtList[c] as TableTag;
                                        if (IsTableBid(tab))
                                        {
                                            for (int d = 0; d < tab.Rows[0].ColumnCount; d++)
                                            {
                                                try
                                                {
                                                    ctx += tab.Rows[0].Columns[d].ToNodePlainString() + ":";
                                                    ctx += tab.Rows[1].Columns[d].ToNodePlainString() + "\r\n";
                                                }
                                                catch { }
                                            }
                                            break;
                                        }
                                    }
                                    if (string.IsNullOrEmpty(ctx))
                                    {
                                        if (dtList.Count > 3)
                                        {
                                            TableTag tab = dtList[2] as TableTag;
                                            if (tab.RowCount > 1)
                                            {
                                                for (int d = 0; d < tab.Rows[0].ColumnCount; d++)
                                                {
                                                    try
                                                    {
                                                        ctx += tab.Rows[0].Columns[d].ToNodePlainString() + ":";
                                                        ctx += tab.Rows[1].Columns[d].ToNodePlainString() + "\r\n";
                                                    }
                                                    catch { }
                                                }
                                            }
                                        }
                                        else if (dtList.Count > 2)
                                        {
                                            TableTag tab = dtList[1] as TableTag;
                                            if (tab.RowCount > 1)
                                            {
                                                for (int d = 0; d < tab.Rows[0].ColumnCount; d++)
                                                {
                                                    try
                                                    {
                                                        ctx += tab.Rows[0].Columns[d].ToNodePlainString() + ":";
                                                        ctx += tab.Rows[1].Columns[d].ToNodePlainString() + "\r\n";
                                                    }
                                                    catch { }
                                                }
                                            }
                                        }
                                        else if (dtList.Count > 1)
                                        {
                                            TableTag tab = dtList[1] as TableTag;
                                            if (tab.RowCount > 1)
                                            {
                                                for (int d = 0; d < tab.Rows[0].ColumnCount; d++)
                                                {
                                                    try
                                                    {
                                                        ctx += tab.Rows[0].Columns[d].ToNodePlainString() + ":";
                                                        ctx += tab.Rows[1].Columns[d].ToNodePlainString() + "\r\n";
                                                    }
                                                    catch { }
                                                }
                                            }
                                        }
                                        else
                                        {
                                            TableTag tab = dtList[0] as TableTag;
                                            if (tab.RowCount > 1)
                                            {
                                                for (int d = 0; d < tab.Rows[0].ColumnCount; d++)
                                                {
                                                    try
                                                    {
                                                        ctx += tab.Rows[0].Columns[d].ToNodePlainString() + ":";
                                                        ctx += tab.Rows[1].Columns[d].ToNodePlainString() + "\r\n";
                                                    }
                                                    catch { }
                                                }
                                            }
                                        }
                                    }
                                    bidUnit = ctx.GetBidRegex();
                                    if (string.IsNullOrEmpty(bidUnit))
                                    {
                                        bidUnit = ctx.GetRegex("中标承包商");
                                    }
                                    if (string.IsNullOrEmpty(bidUnit))
                                    {
                                        bidUnit = ctx.GetRegex("中标(成交)供应商");
                                    }
                                    if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney))
                                    {
                                        bidMoney = ctx.GetMoneyRegex();
                                    }
                                    if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney))
                                    {
                                        bidMoney = bidCtx.GetRegex("中标价").GetMoney();
                                    }
                                    if (string.IsNullOrEmpty(bidUnit))
                                    {
                                        if (dtList.Count > 4)
                                        {
                                            TableTag tab = dtList[dtList.Count - 1] as TableTag;
                                            if (tab.RowCount > 1)
                                            {
                                                for (int d = 0; d < tab.Rows[0].ColumnCount; d++)
                                                {
                                                    try
                                                    {
                                                        ctx += tab.Rows[0].Columns[d].ToNodePlainString() + ":";
                                                        ctx += tab.Rows[1].Columns[d].ToNodePlainString() + "\r\n";
                                                    }
                                                    catch { }
                                                }
                                            }
                                        }
                                        bidUnit = ctx.GetBidRegex();
                                        if (string.IsNullOrEmpty(bidUnit))
                                        {
                                            bidUnit = ctx.GetRegex("中标承包商");
                                        }
                                        if (string.IsNullOrEmpty(bidUnit))
                                        {
                                            bidUnit = ctx.GetRegex("中标(成交)供应商");
                                        }
                                        if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney))
                                        {
                                            bidMoney = ctx.GetMoneyRegex();
                                        }
                                        if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney))
                                        {
                                            bidMoney = bidCtx.GetRegex("中标价").GetMoney();
                                        }
                                    }
                                    if (bidUnit.Contains("无中标") || bidUnit.Contains("没有"))
                                    {
                                        bidUnit  = "没有中标商";
                                        bidMoney = "0";
                                    }
                                }
                            }
                            #endregion
                            if (string.IsNullOrEmpty(bidUnit))
                            {
                                if (bidCtx.Contains("供应商不足"))
                                {
                                    bidUnit  = "没有中标商";
                                    bidMoney = "0";
                                }
                            }
                            if (bidMoney != "0")
                            {
                                try
                                {
                                    decimal mon = decimal.Parse(bidMoney);
                                    if (mon > 100000)
                                    {
                                        bidMoney = bidMoney.GetMoney();
                                    }
                                }
                                catch { }
                            }
                            bidType = prjName.GetInviteBidType();
                            string[] CodeRegex = { "工程编号", "项目编号", "招标编号", "中标编号" };
                            code = bidCtx.GetCodeRegex(CodeRegex).GetCodeDel();


                            if (string.IsNullOrEmpty(code))
                            {
                                code = bidCtx.Replace(")", "kdxx").Replace(")", "kdxx").GetRegexBegEnd("招标编号", "kdxx").Replace(":", "").Replace(":", "");
                            }
                            if (string.IsNullOrEmpty(code))
                            {
                                code = bidCtx.Replace(")", "kdxx").Replace(")", "kdxx").GetRegexBegEnd("项目编号", "kdxx").Replace(":", "").Replace(":", "");
                            }
                            if (string.IsNullOrEmpty(code))
                            {
                                code = bidCtx.Replace(")", "kdxx").Replace(")", "kdxx").GetRegexBegEnd("工程编号", "kdxx").Replace(":", "").Replace(":", "");
                            }
                            if (string.IsNullOrEmpty(code))
                            {
                                code = bidCtx.Replace(")", "kdxx").Replace(")", "kdxx").GetRegexBegEnd("编号", "kdxx").Replace(":", "").Replace(":", "");
                            }
                            if (Encoding.Default.GetByteCount(code) > 50)
                            {
                                code = string.Empty;
                            }
                            if (!string.IsNullOrEmpty(code))
                            {
                                code = code.GetChina();
                            }
                            bidUnit  = bidUnit.Replace("名称", "");
                            code     = code.Replace("(", "").Replace("(", "").Replace(")", "").Replace(")", "");
                            msgType  = "深圳市坪山新区公共资源交易中心";
                            specType = "政府采购";
                            bidType  = "服务";
                            BidInfo info = ToolDb.GenBidInfo("广东省", "深圳政府采购", "坪山新区", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);

                            list.Add(info);

                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aTagNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aTagNode != null && aTagNode.Count > 0)
                            {
                                for (int k = 0; k < aTagNode.Count; k++)
                                {
                                    ATag aFile = aTagNode[k].GetATag();
                                    if (aFile.IsAtagAttach() || aFile.Link.ToLower().Contains("down"))
                                    {
                                        string link = string.Empty;
                                        if (aFile.Link.Contains("http"))
                                        {
                                            link = aFile.Link;
                                        }
                                        else
                                        {
                                            link = "http://ps.szzfcg.cn/" + aFile.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(aFile.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #2
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "pagination")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[pageNode.Count - 1].ToNodePlainString().GetRegexBegEnd("/共", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "pageindex",
                        "X-Requested-With"
                    }, new string[] {
                        i.ToString(),
                        "XMLHttpRequest"
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "left_picinfo_text")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string prjName = string.Empty,
                               buildUnit = string.Empty, bidUnit = string.Empty,
                               bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty,
                               beginDate = string.Empty,
                               endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty,
                               msgType = string.Empty, bidCtx = string.Empty,
                               prjAddress = string.Empty, remark = string.Empty,
                               prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        INode node = listNode[j];
                        ATag  aTag = node.GetATag();
                        prjName   = aTag.LinkText;
                        beginDate = node.ToPlainTextString().GetDateRegex("yyyy年MM月dd日");
                        InfoUrl   = "http://www.zzjs.com.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("colspan", "2")), true), new TagNameFilter("table")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.AsHtml();
                            TableTag table = dtlNode[0] as TableTag;
                            for (int r = 0; r < table.RowCount; r++)
                            {
                                for (int c = 0; c < table.Rows[r].ColumnCount; c++)
                                {
                                    string temp = table.Rows[r].Columns[c].ToNodePlainString();
                                    if (string.IsNullOrWhiteSpace(temp))
                                    {
                                        continue;
                                    }
                                    if ((c + 1) % 2 == 0)
                                    {
                                        bidCtx += temp.GetReplace(":,:") + "\r\n";
                                    }
                                    else
                                    {
                                        bidCtx += temp.GetReplace(":,:") + ":";
                                    }
                                }
                            }

                            prjAddress = bidCtx.GetAddressRegex().GetCodeDel().GetReplace(" ");
                            buildUnit  = bidCtx.GetBuildRegex().GetReplace(" ");
                            if (buildUnit.Contains("公司"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                            }
                            if (buildUnit.Contains("联系"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("联系"));
                            }
                            if (buildUnit.Contains("地址"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址"));
                            }
                            code = bidCtx.GetCodeRegex().GetCodeDel();

                            bidUnit  = bidCtx.GetBidRegex();
                            bidMoney = bidCtx.GetMoneyRegex();
                            msgType  = "郑州市城乡建设委员会";
                            specType = bidType = "建设工程";
                            prjMgr   = bidCtx.GetMgrRegex().GetReplace("/,EndFragment");
                            BidInfo info = ToolDb.GenBidInfo("河南省", "河南省及地市", "郑州市", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.zzjs.com.cn/" + a.Link.GetReplace("../,./");
                                        }
                                        if (Encoding.Default.GetByteCount(link) > 500)
                                        {
                                            continue;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #3
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default, ref cookiestr);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("nowrap", "true")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("总页数:", "当");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(
                        new string[] {
                        "__VIEWSTATE",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__EVENTVALIDATION"
                    },
                        new string[] {
                        viewState,
                        "MoreInfoList1$Pager",
                        i.ToString(),
                        eventValidation
                    }
                        );
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string prjName = string.Empty,
                               buildUnit = string.Empty, bidUnit = string.Empty,
                               bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty,
                               beginDate = string.Empty,
                               endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty,
                               msgType = string.Empty, bidCtx = string.Empty,
                               prjAddress = string.Empty, remark = string.Empty,
                               prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        try
                        {
                            prjName = aTag.GetAttribute("title");
                        }
                        catch
                        {
                            continue;
                        }
                        beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex();

                        InfoUrl = "http://qhzbtb.qhwszwdt.gov.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.AsHtml();
                            bidCtx  = HtmlTxt.ToCtxString();
                            parser  = new Parser(new Lexer(HtmlTxt));
                            NodeList dtlBidNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                            if (dtlBidNode != null && dtlBidNode.Count > 0)
                            {
                                TableTag bidTable = dtlBidNode[0] as TableTag;
                                string   ctx      = string.Empty;
                                for (int r = 0; r < bidTable.RowCount; r++)
                                {
                                    for (int c = 0; c < bidTable.Rows[r].ColumnCount; c++)
                                    {
                                        string temp = bidTable.Rows[r].Columns[c].ToNodePlainString();
                                        if (string.IsNullOrEmpty(temp))
                                        {
                                            continue;
                                        }

                                        if ((c + 1) % 2 == 0)
                                        {
                                            ctx += temp + "\r\n";
                                        }
                                        else
                                        {
                                            ctx += temp + ":";
                                        }
                                    }
                                }
                                prjAddress = ctx.GetAddressRegex();
                                buildUnit  = ctx.GetBuildRegex();
                                bidUnit    = ctx.GetBidRegex().GetReplace("第一名,第一");
                                if (string.IsNullOrEmpty(bidUnit))
                                {
                                    bidUnit = ctx.GetRegex("第一名");
                                }
                                if (string.IsNullOrEmpty(bidUnit))
                                {
                                    bidUnit = ctx.GetRegex("第一");
                                }
                                if (string.IsNullOrEmpty(bidUnit))
                                {
                                    bidUnit = ctx.GetRegex("1");
                                }
                                bidMoney = ctx.GetMoneyRegex();
                                prjMgr   = ctx.GetMgrRegex();
                                if (string.IsNullOrEmpty(prjMgr))
                                {
                                    prjMgr = ctx.GetMgrRegex(new string[] { "建造师姓名" });
                                }
                                code = ctx.GetCodeRegex();

                                if (string.IsNullOrEmpty(bidUnit) || bidUnit.Contains("中标价"))
                                {
                                    ctx = string.Empty;
                                    for (int r = 0; r < bidTable.RowCount; r++)
                                    {
                                        string rowName = bidTable.Rows[r].ToNodePlainString();
                                        for (int c = 0; c < bidTable.Rows[r].ColumnCount; c++)
                                        {
                                            if (rowName.Contains("中标人") || rowName.Contains("中标价"))
                                            {
                                                try
                                                {
                                                    ctx += bidTable.Rows[r].Columns[c].ToNodePlainString() + ":";
                                                    ctx += bidTable.Rows[r + 1].Columns[c].ToNodePlainString() + "\r\n";
                                                }
                                                catch { }
                                            }
                                            else
                                            {
                                                string temp = bidTable.Rows[r].Columns[c].ToNodePlainString();

                                                if ((c + 1) % 2 == 0)
                                                {
                                                    ctx += temp + "\r\n";
                                                }
                                                else
                                                {
                                                    ctx += temp + ":";
                                                }
                                            }
                                        }
                                    }
                                    if (string.IsNullOrEmpty(buildUnit))
                                    {
                                        buildUnit = ctx.GetBuildRegex();
                                    }
                                    bidUnit = ctx.GetBidRegex().GetReplace("第一名,第一");;
                                    if (string.IsNullOrEmpty(bidUnit))
                                    {
                                        bidUnit = ctx.GetRegex("第一名");
                                    }
                                    if (string.IsNullOrEmpty(bidUnit))
                                    {
                                        bidUnit = ctx.GetRegex("第一");
                                    }
                                    if (string.IsNullOrEmpty(bidUnit))
                                    {
                                        bidUnit = ctx.GetRegex("1");
                                    }
                                    if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney))
                                    {
                                        bidMoney = ctx.GetMoneyRegex();
                                    }
                                    if (string.IsNullOrEmpty(prjMgr) || prjMgr.IsNumber())
                                    {
                                        prjMgr = ctx.GetMgrRegex();
                                    }
                                    if (string.IsNullOrEmpty(prjMgr) || prjMgr.IsNumber())
                                    {
                                        prjMgr = ctx.GetMgrRegex(new string[] { "建造师姓名" });
                                    }
                                    if (string.IsNullOrEmpty(code))
                                    {
                                        code = ctx.GetCodeRegex();
                                    }
                                }
                            }
                            else
                            {
                                prjAddress = bidCtx.GetAddressRegex();
                                buildUnit  = bidCtx.GetBuildRegex();
                                bidUnit    = bidCtx.GetBidRegex().GetReplace("第一名,第一");;
                                if (string.IsNullOrEmpty(bidUnit))
                                {
                                    bidUnit = bidCtx.GetRegex("第一中标排序人");
                                }
                                bidMoney = bidCtx.GetMoneyRegex();
                                prjMgr   = bidCtx.GetMgrRegex();
                                if (string.IsNullOrEmpty(prjMgr))
                                {
                                    prjMgr = bidCtx.GetRegex("注册监理工程师");
                                }
                                code = bidCtx.GetCodeRegex();
                            }
                            if (buildUnit.Contains("公司"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                            }
                            if (buildUnit.Contains("联系"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("联系"));
                            }
                            if (buildUnit.Contains("地址"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址"));
                            }

                            if (bidUnit.Contains("公司"))
                            {
                                bidUnit = bidUnit.Remove(bidUnit.IndexOf("公司")) + "公司";
                            }
                            if (bidUnit.Contains("联系"))
                            {
                                bidUnit = bidUnit.Remove(bidUnit.IndexOf("联系"));
                            }
                            if (bidUnit.Contains("地址"))
                            {
                                bidUnit = bidUnit.Remove(bidUnit.IndexOf("地址"));
                            }
                            buildUnit = buildUnit.Replace(" ", "");
                            bidUnit   = bidUnit.GetReplace("一标段");
                            if (bidUnit.IsNumber() || bidUnit.Contains("中标") || bidUnit.Contains("投标") || bidUnit.Contains("合格"))
                            {
                                bidUnit = string.Empty;
                            }
                            code = code.Replace(" ", "");
                            try
                            {
                                if (decimal.Parse(bidMoney) > 100000)
                                {
                                    bidMoney = (decimal.Parse(bidMoney) / 10000).ToString();
                                }
                            }
                            catch { }
                            prjMgr = prjMgr.Replace(" ", "");
                            if (prjMgr.IsNumber() || prjMgr.Contains("注册") || prjMgr.Contains("中标") || prjMgr.Contains("证书"))
                            {
                                prjMgr = string.Empty;
                            }
                            bidType  = "建设工程";
                            specType = "政府采购";
                            msgType  = "青海省公共资源交易监督管理局";
                            BidInfo info = ToolDb.GenBidInfo("青海省", "青海省及地市", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://qhzbtb.qhwszwdt.gov.cn/" + a.Link.GetReplace("../,./");
                                        }
                                        if (Encoding.Default.GetByteCount(link) > 500)
                                        {
                                            continue;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #4
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default, ref cookiestr);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser    = new Parser(new Lexer(htl));
            NodeList nodeList  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("align", "right")));
            Regex    regexPage = new Regex(@"共\d+页");

            try
            {
                page = Convert.ToInt32(regexPage.Match(nodeList[0].ToPlainTextString()).Value.Replace("共", "").Replace("页", "").Trim());
            }
            catch (Exception)
            { }
            for (int i = 1; i <= page; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl("http://bidding.szu.edu.cn/list.asp?page=" + i.ToString(), Encoding.Default);
                    }
                    catch (Exception ex) { continue; }
                }
                parser = new Parser(new Lexer(htl));
                NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("style", "border-collapse: collapse")));
                if (tableNodeList != null && tableNodeList.Count > 0)
                {
                    TableTag table = (TableTag)tableNodeList[0];
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, img = string.Empty,
                               HtmlTxt = string.Empty;
                        TableRow tr    = table.Rows[j];

                        string m = tr.ChildrenHTML.ToString();
                        prjName   = tr.Columns[0].ToPlainTextString().Trim().Replace("·", "");
                        beginDate = tr.Columns[0].ToPlainTextString().GetDateRegex();
                        ATag aTag = tr.Columns[0].SearchFor(typeof(ATag), true)[1] as ATag;
                        if (prjName.Contains(")") && prjName.Contains("("))
                        {
                            int leng = prjName.IndexOf("(");
                            code    = prjName.Replace("(", "kdxx").Replace(")", "xxdk").GetRegexBegEnd("kdxx", "xxdk");
                            prjName = prjName.Remove(leng);
                            string l = prjName.GetRegexBegEnd("&nbsp;", "&nbsp;");
                            code    = prjName.GetRegexBegEnd("招标公告", "&nbsp;");
                            prjName = prjName.Replace(l, "").Replace("&nbsp;", "");
                        }
                        else if (prjName.Contains(")") && prjName.Contains("("))
                        {
                            int leng = prjName.IndexOf("(");
                            code = prjName.Replace("(", "kdxx").Replace(")", "xxdk").GetRegexBegEnd("kdxx", "xxdk");
                            string l = prjName.GetRegexBegEnd("&nbsp;", "&nbsp;");
                            code    = prjName.GetRegexBegEnd("招标公告", "&nbsp;");
                            prjName = prjName.Replace(l, "").Replace("&nbsp;", "");
                        }
                        InfoUrl = "http://bidding.szu.edu.cn/" + aTag.Link;
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default);
                        }
                        catch (Exception)
                        {
                            continue;
                        }
                        Parser   parserdetail = new Parser(new Lexer(htmldetail));
                        NodeList dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellspacing", "0")));
                        if (dtnode != null && dtnode.Count > 0)
                        {
                            HtmlTxt   = dtnode.AsHtml();
                            inviteCtx = HtmlTxt.Replace("<li>", "\r\n").Replace("</li>", "\r\n").ToCtxString().Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n");
                            if (string.IsNullOrEmpty(code))
                            {
                                code = inviteCtx.GetCodeRegex();
                            }
                            buildUnit = inviteCtx.GetBuildRegex();
                            if (string.IsNullOrEmpty(buildUnit))
                            {
                                buildUnit = inviteCtx.GetRegex("招标机构名称");
                            }

                            prjAddress = inviteCtx.GetAddressRegex();
                            if (string.IsNullOrEmpty(prjAddress))
                            {
                                prjAddress = inviteCtx.GetRegexBegEnd("开标室", "。");
                            }
                            msgType = "深圳大学";
                            if (inviteType == "设备材料" || inviteType == "小型施工" || inviteType == "专业分包" || inviteType == "劳务分包" || inviteType == "服务" || inviteType == "勘察" || inviteType == "设计" || inviteType == "监理" || inviteType == "施工")
                            {
                                specType = "建设工程";
                            }
                            else
                            {
                                specType = "其他";
                            }
                            if (prjAddress == "")
                            {
                                prjAddress = "见招标信息";
                            }
                            inviteType = ToolHtml.GetInviteTypes(prjName);
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳社会招标", "",
                                                                   string.Empty, code, prjName, prjAddress, buildUnit,
                                                                   beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int a = 0; a < aNode.Count; a++)
                                {
                                    ATag aTag1 = aNode[a] as ATag;
                                    if (aTag1.IsAtagAttach())
                                    {
                                        string fileUrl = string.Empty;
                                        if (aTag1.Link.Contains("http"))
                                        {
                                            fileUrl = aTag1.Link;
                                        }
                                        else
                                        {
                                            fileUrl = ToolWeb.UrlEncode("http://bidding.szu.edu.cn/" + aTag1.Link);// System.Web.HttpUtility.UrlEncode( aTag1.Link);
                                        }
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #5
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "navigation")));

            if (pageNode != null && pageNode.Count > 0)
            {
                string temp = pageNode[0].ToNodePlainString().GetRegexBegEnd("总共", "页").GetReplace("【,】,[,]");
                try
                {
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&page=" + i, Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "slidingList")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        INode node = listNode[j];

                        ATag aTag = node.GetATag();
                        prjName   = aTag.GetAttribute("title");
                        beginDate = node.GetSpan().StringText;
                        if (!string.IsNullOrEmpty(beginDate))
                        {
                            beginDate = beginDate.Substring(0, 4) + "-" + beginDate.Substring(4, 2) + "-" + beginDate.Substring(6, 2);
                        }
                        InfoUrl = "http://www.gsggzyjy.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "ContentPlaceHolder1_AnnoGoodsHtml")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml();
                            inviteCtx  = HtmlTxt.ToCtxString();
                            msgType    = "甘肃省公共资源交易中心";
                            specType   = "政府采购";
                            inviteType = "交通运输工程";
                            InviteInfo info = ToolDb.GenInviteInfo("甘肃省", "甘肃省及地市", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(htmldtl));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("Iframe"), new HasAttributeFilter("id", "Iframe")));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    IFrameTag itag = aNode[k] as IFrameTag;
                                    string    link = itag.GetAttribute("src");
                                    if (!string.IsNullOrEmpty(link))
                                    {
                                        BaseAttach attach = ToolDb.GenBaseAttach(prjName + ".pdf", info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList atagNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (atagNode != null && atagNode.Count > 0)
                            {
                                for (int a = 0; a < atagNode.Count; a++)
                                {
                                    ATag fileTag = atagNode[a] as ATag;
                                    if (fileTag.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (fileTag.Link.Contains("http"))
                                        {
                                            link = fileTag.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.gsggzyjy.cn/" + fileTag.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(fileTag.LinkText, info.Id, link);
                                        if (!base.AttachList.Exists(x => x.AttachServerPath == link))
                                        {
                                            base.AttachList.Add(attach);
                                        }
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #6
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("style", "padding-top:15px;")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("共", "页").Replace(":", "");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.huajiezn.cn/html/zbgg/index_" + i + ".html");
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "mainnavb")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;

                        ATag aTag = listNode[j].GetATag();
                        prjName   = aTag.LinkText.ToNodeString();
                        beginDate = listNode[j].ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://www.huajiezn.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "Zoom")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.AsHtml();
                            parser.Reset();
                            NodeList h3Node = parser.ExtractAllNodesThatMatch(new TagNameFilter("h3"));
                            if (h3Node != null && h3Node.Count > 0)
                            {
                                prjName = h3Node[0].ToNodePlainString();
                            }
                            inviteCtx  = HtmlTxt.ToCtxString();
                            prjAddress = inviteCtx.GetAddressRegex();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            if (buildUnit.Contains("公司"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                            }
                            if (buildUnit.Contains("地址"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址"));
                            }
                            if (buildUnit.Contains("招标代理"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理"));
                            }
                            code       = inviteCtx.GetCodeRegex().GetCodeDel();
                            msgType    = "华杰工程咨询有限公司中南分公司";
                            specType   = "建设工程";
                            inviteType = prjName.GetInviteBidType();
                            buildUnit  = buildUnit.Replace(" ", "");
                            InviteInfo info = ToolDb.GenInviteInfo("湖北省", "湖北省及地市", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.huajiezn.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #7
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser parser = new Parser(new Lexer(html));

            NodeList pageo = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "leftnav")), true), new TagNameFilter("span")));

            if (pageo != null && pageo.Count > 0)
            {
                string pages = pageo.AsString().GetRegexBegEnd("条", "页");
                try
                {
                    pageInt = int.Parse(pages.Replace("/", ""));
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&pageNo=" + i, Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "div_list")), true), new TagNameFilter("ul")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    for (int j = 0; j < nodeList.Count; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        ATag aTag = nodeList[j].GetATag();
                        prjName   = aTag.GetAttribute("title");
                        beginDate = nodeList[j].ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://zyjy.dayawan.gov.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "div_view")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml();
                            inviteCtx  = HtmlTxt.Replace("</p>", "\r\n").ToCtxString();
                            prjAddress = inviteCtx.GetAddressRegex();
                            buildUnit  = inviteCtx.GetBuildRegex();

                            code = inviteCtx.GetCodeRegex().GetCodeDel();
                            if (!string.IsNullOrWhiteSpace(code))
                            {
                                if (code[code.Length - 1] != '号')
                                {
                                    code = "";
                                }
                            }
                            msgType    = "惠州大亚湾经济技术开发区公共资源交易中心";
                            specType   = "建设工程";
                            inviteType = ToolHtml.GetInviteTypes(prjName);
                            buildUnit  = buildUnit.Replace(" ", "");

                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "惠州市区", "大亚湾区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);

                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNodes = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNodes != null && aNodes.Count > 0)
                            {
                                for (int a = 0; a < aNodes.Count; a++)
                                {
                                    ATag aFile = aNodes[a] as ATag;
                                    if (aFile.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (aFile.Link.ToLower().Contains("http"))
                                        {
                                            link = aFile.Link;
                                        }
                                        else
                                        {
                                            link = "http://zyjy.dayawan.gov.cn/" + aFile.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(aFile.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #8
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                string str = System.Web.HttpUtility.UrlDecode("appid=1&webid=1&path=%2F&columnid=808&sourceContentType=1&unitid=620&webname=%E6%B5%99%E6%B1%9F%E7%9C%81%E5%8F%91%E5%B1%95%E5%92%8C%E6%94%B9%E9%9D%A9%E5%A7%94%E5%91%98%E4%BC%9A&permissiontype=0");
                NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                    "col",
                    "appid",
                    "webid",
                    "path",
                    "columnid",
                    "sourceContentType",
                    "unitid",
                    "webname",
                    "permissiontype"
                },
                                                                                  new string[] {
                    "1",
                    "1",
                    "1",
                    "/",
                    "148",
                    "1",
                    "363",
                    "江苏政务服务网",
                    "0"
                });
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc);
            }
            catch { return(null); }

            try
            {
                string temp = html.GetRegexBegEnd("<totalpage>", "</totalpage>");
                pageInt = int.Parse(temp);
            }
            catch { }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "col",
                        "appid",
                        "webid",
                        "path",
                        "columnid",
                        "sourceContentType",
                        "unitid",
                        "webname",
                        "permissiontype"
                    },
                                                                                      new string[] {
                        "1",
                        "1",
                        "1",
                        "/",
                        "148",
                        "1",
                        "363",
                        "江苏政务服务网",
                        "0"
                    });
                    try
                    {
                        int endrecord   = i * 45;
                        int startrecord = 45 * i - 44;
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.jszwfw.gov.cn/module/jslib/jquery/jpage/dataproxy.jsp?perpage=15&endrecord=" + endrecord + "&startrecord=" + startrecord, nvc);
                    }
                    catch { continue; }
                }

                Parser   parser   = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "99%")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string prjName = string.Empty,
                               buildUnit = string.Empty, bidUnit = string.Empty,
                               bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty,
                               beginDate = string.Empty,
                               endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty,
                               msgType = string.Empty, bidCtx = string.Empty,
                               prjAddress = string.Empty, remark = string.Empty,
                               prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;
                        TableRow tr   = (listNode[j] as TableTag).Rows[0];
                        ATag     aTag = tr.Columns[1].GetATag();
                        prjName = aTag.GetAttribute("title");
                        if (prjName.Contains(" "))
                        {
                            string[] str = prjName.Split(' ');
                            code    = str[0];
                            prjName = str[1];
                        }
                        else
                        {
                            string str = prjName.GetNotChina();
                            if (str.Length > 2 && prjName.IsNumber())
                            {
                                try
                                {
                                    int index = prjName.IndexOf(str.Substring(0, 2));
                                    code    = prjName.Substring(0, index);
                                    prjName = prjName.Substring(index, prjName.Length - index);
                                }
                                catch { }
                            }
                        }
                        beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://www.jszwfw.gov.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "zoom")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt   = dtlNode.AsHtml();
                            bidCtx    = HtmlTxt.ToLower().GetReplace("</p>,</br>", "\r\n").ToCtxString();
                            buildUnit = bidCtx.GetBuildRegex();
                            if (string.IsNullOrEmpty(code))
                            {
                                code = bidCtx.GetCodeRegex().GetCodeDel();
                            }
                            bidUnit = bidCtx.GetBidRegex().GetReplace("名称");
                            if (string.IsNullOrEmpty(bidUnit))
                            {
                                bidUnit = bidCtx.GetRegex("第一中标候选单位为,第一名,中标(成交)候选人名称").GetReplace("名称");
                            }
                            bidMoney = bidCtx.GetMoneyRegex();
                            if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney))
                            {
                                bidMoney = bidCtx.GetMoneyRegex(null, true);
                            }
                            prjMgr = bidCtx.GetMgrRegex();
                            try
                            {
                                if (decimal.Parse(bidMoney) > 100000)
                                {
                                    bidMoney = (decimal.Parse(bidMoney) / 10000).ToString();
                                }
                            }
                            catch { }
                            msgType  = "江苏省政务服务管理办公室";
                            specType = "政府采购";
                            bidType  = "建设工程";
                            BidInfo info = ToolDb.GenBidInfo("江苏省", "江苏省及地市", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.jszwfw.gov.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #9
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            int    sqlCount        = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default).GetJsString();
            }
            catch (Exception ex)
            {
                return(null);
            }

            Parser   parser   = new Parser(new Lexer(html));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "font9grey1")));

            if (nodeList != null && nodeList.Count > 1)
            {
                for (int i = 0; i < 2; i++)
                {
                    TableTag table = nodeList[i] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        TableRow tr = table.Rows[j];
                        infoType    = "办事指南";
                        headName    = tr.Columns[1].ToNodePlainString();
                        releaseTime = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        infoUrl     = "http://www.gzzb.gd.cn" + tr.Columns[1].GetATagHref();
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "contentDiv")));
                        if (dtlList != null && dtlList.Count > 0)
                        {
                            ctxHtml = dtlList.AsHtml();
                            infoCtx = ctxHtml.ToCtxString();
                            msgType = MsgTypeCosnt.GuangZhouMsgType;
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "广州市区", string.Empty, infoCtx, infoType);
                            sqlCount++;
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                            {
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList imgList = parser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                                if (imgList != null && imgList.Count > 0)
                                {
                                    for (int img = 0; img < imgList.Count; img++)
                                    {
                                        ImageTag imgTag = imgList[img] as ImageTag;
                                        try
                                        {
                                            BaseAttach obj = null;
                                            if (imgTag.GetAttribute("src").Contains("http"))
                                            {
                                                obj = ToolHtml.GetBaseAttach(imgTag.GetAttribute("src"), headName, info.Id);
                                            }
                                            else
                                            {
                                                obj = ToolHtml.GetBaseAttach("http://www.gzzb.gd.cn" + imgTag.GetAttribute("src"), headName, info.Id);
                                            }
                                            if (obj != null)
                                            {
                                                ToolDb.SaveEntity(obj, string.Empty);
                                            }
                                        }
                                        catch { }
                                    }
                                }
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int a = 0; a < aNode.Count; a++)
                                    {
                                        ATag aTag = aNode[a] as ATag;
                                        if (aTag.IsAtagAttach())
                                        {
                                            try
                                            {
                                                BaseAttach obj = null;
                                                if (aTag.Link.Contains("http"))
                                                {
                                                    obj = ToolHtml.GetBaseAttach(aTag.Link, aTag.LinkText, info.Id);
                                                }
                                                else
                                                {
                                                    obj = ToolHtml.GetBaseAttach("http://www.gzzb.gd.cn" + aTag.Link, aTag.LinkText, info.Id);
                                                }
                                                if (obj != null)
                                                {
                                                    ToolDb.SaveEntity(obj, string.Empty);
                                                }
                                            }
                                            catch { }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
Beispiel #10
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            //取得页码
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch (Exception ex)
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "scott")), true), new TagNameFilter("a")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    string temp = pageList[pageList.Count - 1].GetATagValue().Replace("(", "kdxx").Replace(")", "xxdk").GetRegexBegEnd("kdxx", "xxdk");
                    pageInt = Convert.ToInt32(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "lefttable")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty,
                               infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        TableRow tr = table.Rows[j];
                        infoType    = "办事指南";
                        headName    = tr.Columns[1].ToNodePlainString();
                        releaseTime = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        infoUrl     = tr.Columns[1].GetATagHref();
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString().Replace("<?xml:namespace prefix = o ns = \"urn:schemas-microsoft-com:office:office\" />", "");
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "context_div")));
                        if (dtlList != null && dtlList.Count > 0)
                        {
                            ctxHtml = dtlList.AsHtml();
                            infoCtx = ctxHtml.ToCtxString();
                            msgType = MsgTypeCosnt.ZhongShanMsgType;
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "中山市区", string.Empty, infoCtx, infoType);
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            else
                            {
                                sqlCount++;
                                if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                                {
                                    parser = new Parser(new Lexer(ctxHtml));
                                    NodeList imgList = parser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                                    if (imgList != null && imgList.Count > 0)
                                    {
                                        for (int img = 0; img < imgList.Count; img++)
                                        {
                                            ImageTag   imgTag   = imgList[img] as ImageTag;
                                            BaseAttach baseInfo = ToolHtml.GetBaseAttachByUrl(imgTag.GetAttribute("src"), headName, info.Id);
                                            if (baseInfo != null)
                                            {
                                                ToolDb.SaveEntity(baseInfo, string.Empty);
                                            }
                                        }
                                    }
                                    parser = new Parser(new Lexer(ctxHtml));
                                    NodeList attachList = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                    if (attachList != null && attachList.Count > 0)
                                    {
                                        for (int a = 0; a < attachList.Count; a++)
                                        {
                                            ATag aTag = attachList[a] as ATag;
                                            if (aTag.IsAtagAttach())
                                            {
                                                BaseAttach obj = ToolHtml.GetBaseAttachByUrl(aTag.Link, aTag.LinkText, info.Id);
                                                if (obj != null)
                                                {
                                                    ToolDb.SaveEntity(obj, string.Empty);
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
Beispiel #11
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch { return(null); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "huifont")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString();
                    temp    = temp.Substring(temp.IndexOf("/") + 1, temp.Length - temp.IndexOf("/") - 1);
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.hebggzy.cn/024/024002/" + i + ".html", Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("li"), new HasAttributeFilter("class", "right-text-li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        INode node = listNode[j];
                        ATag  aTag = node.GetATag();
                        infoType    = "通知公告";
                        headName    = aTag.GetAttribute("title");
                        releaseTime = node.ToPlainTextString().GetDateRegex();
                        infoUrl     = "http://www.hebggzy.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "article-main")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            ctxHtml = dtlNode.AsHtml();
                            infoCtx = ctxHtml.ToCtxString();
                            msgType = "河北省公共资源交易中心";
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "河北省", "河北省及地市", "", infoCtx, infoType);
                            sqlCount++;
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                            {
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k] as ATag;
                                        if (a.Link.ToLower().Contains("download") || a.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = a.Link;
                                            }
                                            else
                                            {
                                                link = "http://www.hebggzy.cn/" + a.Link.GetReplace("../,./");
                                            }
                                            if (Encoding.Default.GetByteCount(link) > 500)
                                            {
                                                continue;
                                            }
                                            try
                                            {
                                                BaseAttach attach = ToolHtml.GetBaseAttachByUrl(link, a.LinkText, info.Id);
                                                if (attach != null)
                                                {
                                                    ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                                                }
                                            }
                                            catch { }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
Beispiel #12
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default, ref cookiestr);
            }
            catch { return(null); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "TblOSInfoList1_Pager")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[0].ToNodePlainString().GetRegexBegEnd("总页数:", "当前");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState = this.ToolWebSite.GetAspNetViewState(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__VIEWSTATE",
                        "TblOSInfoList1:KeyWord",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT"
                    }, new string[] {
                        viewState,
                        "",
                        "TblOSInfoList1:Pager",
                        i.ToString()
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "TblOSInfoList1_DataGrid1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string prjName = string.Empty,
                               buildUnit = string.Empty, bidUnit = string.Empty,
                               bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty,
                               beginDate = string.Empty,
                               endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty,
                               msgType = string.Empty, bidCtx = string.Empty,
                               prjAddress = string.Empty, remark = string.Empty,
                               prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        prjName   = aTag.GetAttribute("title").GetReplace(";");
                        code      = tr.Columns[1].ToNodePlainString().GetReplace("[", "【").GetReplace("]", "】").GetRegexBegEnd("【", "】");
                        beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        if (!aTag.Link.Contains("http"))
                        {
                            string f = aTag.Link;
                            InfoUrl = "http://www.zmctc.com/zjgcjy/Notice/" + aTag.Link;
                        }
                        else
                        {
                            InfoUrl = aTag.Link;
                        }
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                            if (htmldtl.Contains("�"))
                            {
                                htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                            }
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "news")));
                        if (dtlNode == null || dtlNode.Count < 1)
                        {
                            parser.Reset();
                            dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "olds")));
                        }
                        if (dtlNode == null || dtlNode.Count < 1)
                        {
                            parser.Reset();
                            dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent")));
                        }
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.AsHtml();
                            bidCtx  = HtmlTxt.ToCtxString();
                            parser  = new Parser(new Lexer(HtmlTxt));
                            NodeList tableNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "Pbgginfodetailnew1_company")));
                            if (tableNode == null || tableNode.Count < 1)
                            {
                                parser.Reset();
                                tableNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "PbggInfoDetail1_company")));
                            }
                            if (tableNode == null || tableNode.Count < 1)
                            {
                                parser.Reset();
                                tableNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("rules", "all")));
                            }
                            if (tableNode != null && tableNode.Count > 0)
                            {
                                TableTag tag = tableNode[0] as TableTag;
                                string   ctx = string.Empty;
                                for (int r = 0; r < tag.Rows[0].ColumnCount; r++)
                                {
                                    try
                                    {
                                        ctx += tag.Rows[0].Columns[r].ToNodePlainString() + ":";
                                        ctx += tag.Rows[1].Columns[r].ToNodePlainString() + "\r\n";
                                    }
                                    catch { }
                                }
                                if (!ctx.Contains("否决投标"))
                                {
                                    bidUnit = ctx.GetBidRegex();
                                    if (string.IsNullOrEmpty(bidUnit))
                                    {
                                        bidUnit = ctx.GetRegex("中标候选人");
                                    }
                                    bidMoney = ctx.GetMoneyRegex();
                                    prjMgr   = ctx.GetMgrRegex();
                                    if (string.IsNullOrEmpty(prjMgr))
                                    {
                                        prjMgr = ctx.GetRegex("项目经理/总监");
                                    }
                                }
                            }
                            msgType  = "浙江省公共资源交易中心";
                            specType = "政府采购";
                            bidType  = "建设工程";
                            BidInfo info = ToolDb.GenBidInfo("浙江省", "浙江省及地市", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://downc.zmctc.com/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                        }
                        if (!crawlAll && list.Count >= this.MaxCount)
                        {
                            return(list);
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #13
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "p_bar")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("/", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.cqzb.gov.cn/class-5-45(" + i + ").aspx");
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "ztb_list_right")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;

                        INode node = listNode[j];
                        ATag  aTag = node.GetATag();
                        prjName   = aTag.GetAttribute("title");
                        beginDate = node.ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://www.cqzb.gov.cn/" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "ztb_zbxx1")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.AsHtml();
                            bidCtx  = HtmlTxt.ToCtxString();
                            parser  = new Parser(new Lexer(HtmlTxt));
                            NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                            if (tableNode != null && tableNode.Count > 0)
                            {
                                TableTag table = tableNode[0] as TableTag;
                                string   ctx   = string.Empty;
                                for (int r = 0; r < table.RowCount; r++)
                                {
                                    for (int c = 0; c < table.Rows[r].ColumnCount; c++)
                                    {
                                        string temp = table.Rows[r].Columns[c].ToNodePlainString();
                                        if ((c + 1) % 2 == 0)
                                        {
                                            ctx += temp + "\r\n";
                                        }
                                        else
                                        {
                                            ctx += temp + ":";
                                        }
                                    }
                                }
                                bidUnit = ctx.GetRegex("拟中标人");
                                if (bidUnit.Contains("/"))
                                {
                                    bidUnit = ctx.GetBidRegex();
                                }
                                if (bidUnit.Contains("/"))
                                {
                                    bidUnit = ctx.GetRegex("中标人");
                                }
                                bidMoney   = ctx.GetMoneyRegex();
                                buildUnit  = ctx.GetBuildRegex();
                                prjAddress = ctx.GetAddressRegex();
                                prjMgr     = ctx.GetMgrRegex();
                            }
                            try
                            {
                                bidType = prjName.GetInviteBidType();
                            }
                            catch { }
                            specType = "建设工程";
                            msgType  = "重庆市招标投标综合网";
                            BidInfo info = ToolDb.GenBidInfo("重庆市", "重庆市及区县", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.cqzb.gov.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #14
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch { return(list); }

            Parser   parser   = new Parser(new Lexer(html));
            NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "513")));

            if (listNode != null && listNode.Count > 0)
            {
                for (int j = 0; j < listNode.Count; j++)
                {
                    TableTag tag   = listNode[j] as TableTag;
                    string   align = tag.GetAttribute("align");
                    string   style = tag.GetAttribute("style");
                    if (!string.IsNullOrWhiteSpace(align) ||
                        !string.IsNullOrWhiteSpace(style))
                    {
                        continue;
                    }
                    string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                           prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                           specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                           remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                           CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                    TableRow tr = tag.Rows[0];

                    ATag aTag = tr.Columns[1].GetATag();
                    prjName   = aTag.GetAttribute("title");
                    beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                    InfoUrl   = "http://ggzy.jinan.gov.cn" + aTag.Link;
                    string htmldtl = string.Empty;
                    try
                    {
                        htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                    }
                    catch { continue; }
                    parser = new Parser(new Lexer(htmldtl));
                    NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "zoom")));
                    if (dtlNode != null && dtlNode.Count > 0)
                    {
                        HtmlTxt = dtlNode.AsHtml().GetReplace("</p>,<br/>", "\r\n");
                        parser  = new Parser(new Lexer(HtmlTxt));
                        NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                        if (tableNode != null && tableNode.Count > 0)
                        {
                            TableTag table = tableNode[0] as TableTag;
                            for (int r = 0; r < table.RowCount; r++)
                            {
                                for (int c = 0; c < table.Rows[r].ColumnCount; c++)
                                {
                                    string temp = table.Rows[r].Columns[c].ToNodePlainString();
                                    if (string.IsNullOrWhiteSpace(temp))
                                    {
                                        continue;
                                    }
                                    if ((c + 1) % 2 == 0)
                                    {
                                        inviteCtx += temp.GetReplace(":,:") + "\r\n";
                                    }
                                    else
                                    {
                                        inviteCtx += temp.GetReplace(":,:") + ":";
                                    }
                                }
                            }
                        }
                        prjAddress = inviteCtx.GetAddressRegex().GetCodeDel().GetReplace(" ,&mdash");
                        buildUnit  = inviteCtx.GetBuildRegex().GetReplace(" ");
                        if (buildUnit.Contains("公司"))
                        {
                            buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                        }
                        if (buildUnit.Contains("联系"))
                        {
                            buildUnit = buildUnit.Remove(buildUnit.IndexOf("联系"));
                        }
                        if (buildUnit.Contains("地址"))
                        {
                            buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址"));
                        }
                        code       = inviteCtx.GetCodeRegex().GetCodeDel();
                        msgType    = "济南市公共资源交易中心";
                        specType   = "政府采购";
                        inviteType = "建设工程";
                        InviteInfo info = ToolDb.GenInviteInfo("山东省", "山东省及地市", "济南市", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                        list.Add(info);
                        parser = new Parser(new Lexer(HtmlTxt));
                        NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                        if (aNode != null && aNode.Count > 0)
                        {
                            for (int k = 0; k < aNode.Count; k++)
                            {
                                ATag a = aNode[k] as ATag;
                                if (a.IsAtagAttach())
                                {
                                    string link = string.Empty;
                                    if (a.Link.ToLower().Contains("http"))
                                    {
                                        link = a.Link;
                                    }
                                    else
                                    {
                                        link = "http://ggzy.jinan.gov.cn" + a.Link.GetReplace("../,./");
                                    }
                                    if (Encoding.Default.GetByteCount(link) > 500)
                                    {
                                        continue;
                                    }
                                    BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                    base.AttachList.Add(attach);
                                }
                            }
                        }
                        if (!crawlAll && list.Count >= this.MaxCount)
                        {
                            return(list);
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #15
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch { }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "ctl00_ContentPlaceHolder1_Repeater1_ctl16_lblpc")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[0].ToNodePlainString();
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&page=" + (i - 1).ToString(), Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "slist")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;
                        INode node = listNode[j];
                        ATag  aTag = node.GetATag();
                        prjName   = aTag.GetAttribute("title");
                        beginDate = DateTime.Now.Year + "-" + node.GetSpan().StringText.ToNodeString().GetReplace(" ");
                        area      = node.ToNodePlainString().GetReplace("[", "【").GetReplace("]", "】").GetRegexBegEnd("【", "】");
                        InfoUrl   = "http://www.xjztb.net/Homepage/" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "print1")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml();
                            inviteCtx  = HtmlTxt.ToCtxString();
                            prjAddress = inviteCtx.GetAddressRegex();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            if (buildUnit.Contains("公司"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                            }
                            if (buildUnit.Contains("地址"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址")) + "地址";
                            }
                            code       = inviteCtx.GetCodeRegex().GetCodeDel();
                            msgType    = "新疆维吾尔自治区建设工程招标投标监督管理办公室";
                            specType   = "建设工程";
                            inviteType = "建设工程";
                            InviteInfo info = ToolDb.GenInviteInfo("新疆维吾尔自治区", "新疆维吾尔自治区及地市", area, string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.xjztb.net/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #16
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "page")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[0].ToPlainTextString().GetRegexBegEnd("共", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://ggzy.zhuhai.gov.cn//zbjj/index_" + i + ".htm", Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "news")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        INode node = listNode[j];
                        ATag  aTag = node.GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }

                        string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, HtmlTxt = string.Empty;


                        prjName   = aTag.GetAttribute("title");
                        beginDate = node.ToPlainTextString().GetDateRegex();
                        InfoUrl   = aTag.Link;

                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "m_r m_r_g")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.AsHtml();
                            bidCtx  = HtmlTxt.ToCtxString();
                            parser  = new Parser(new Lexer(HtmlTxt.ToLower().Replace("th", "td")));
                            NodeList tableNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "bordertb")));
                            if (tableNode != null && tableNode.Count > 0)
                            {
                                string   ctx   = string.Empty;
                                TableTag table = tableNode[0] as TableTag;
                                for (int r = 1; r < table.RowCount; r++)
                                {
                                    for (int c = 0; c < table.Rows[r].ColumnCount; c++)
                                    {
                                        string temp = table.Rows[r].Columns[c].ToNodePlainString();
                                        if (c % 2 == 0)
                                        {
                                            ctx += temp + ":";
                                        }
                                        else
                                        {
                                            ctx += temp + "\r\n";
                                        }
                                    }
                                }

                                buildUnit = ctx.GetBuildRegex();
                                code      = ctx.GetCodeRegex().GetCodeDel();
                                bidUnit   = ctx.GetBidRegex();
                                bidMoney  = ctx.GetMoneyRegex();
                                prjMgr    = ctx.GetMgrRegex();
                            }

                            msgType  = "珠海市公共资源交易中心";
                            specType = "建设工程";
                            bidType  = prjName.GetInviteBidType();
                            BidInfo info = ToolDb.GenBidInfo("广东省", "珠海市区", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, string.Empty, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://ggzy.zhuhai.gov.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #17
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "mtop pages")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("1/", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.gsei.com.cn/index.php/cms/item-list-category-1336-page-" + i + ".shtml", Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "label_ul_b")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        ATag aTag = listNode[j].GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }

                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;

                        prjName   = aTag.GetAttribute("title");
                        beginDate = listNode[j].ToPlainTextString().GetDateRegex();
                        InfoUrl   = aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "p8_content_show")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml();
                            inviteCtx  = HtmlTxt.ToCtxString();
                            prjAddress = inviteCtx.GetAddressRegex();
                            buildUnit  = inviteCtx.GetBuildRegex();
                            if (buildUnit.Contains("公司"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司";
                            }
                            if (buildUnit.Contains("地址"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址"));
                            }
                            if (buildUnit.Contains("工程地点") || buildUnit.Contains("武警"))
                            {
                                buildUnit = "";
                            }

                            code       = inviteCtx.GetCodeRegex().GetCodeDel();
                            msgType    = "甘肃省信息中心";
                            specType   = "政府采购";
                            inviteType = "建设工程";
                            InviteInfo info = ToolDb.GenInviteInfo("甘肃省", "甘肃省及地市", area, string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.gsei.com.cn/" + a.Link.GetReplace("../,./");
                                        }
                                        if (Encoding.Default.GetByteCount(link) > 500)
                                        {
                                            continue;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #18
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "pagination page-mar")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("/共", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&page=" + i, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "wsbs-table")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string   code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr = table.Rows[j];
                        prjName = tr.Columns[1].ToNodePlainString();
                        if (prjName[prjName.Length - 1] == ')')
                        {
                            int staIndex = prjName.LastIndexOf("(");
                            int endIndex = prjName.LastIndexOf(")");
                            if (staIndex > 0 && endIndex > 0 && endIndex > staIndex)
                            {
                                code = prjName.Substring(staIndex + 1, endIndex - staIndex - 1);
                                if (!code.IsChina())
                                {
                                    prjName = prjName.Remove(staIndex);
                                }
                                else
                                {
                                    code = string.Empty;
                                }
                            }
                        }
                        beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://www.gzggzy.cn" + tr.Columns[1].GetATagHref();
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }

                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "xx-text")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt   = dtlNode.ToHtml();
                            inviteCtx = HtmlTxt.Replace("</p>", "\r\n").Replace("<br />", "\r\n").Replace("<br/>", "\r\n").ToCtxString();
                            if (string.IsNullOrEmpty(code))
                            {
                                code = inviteCtx.GetCodeRegex();
                            }
                            buildUnit = inviteCtx.GetBuildRegex();

                            if (buildUnit.Contains("联系"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("联系"));
                            }
                            prjAddress = inviteCtx.GetAddressRegex();
                            msgType    = "广州公共资源交易中心";
                            specType   = "政府采购";
                            inviteType = inviteCtx.GetRegex("项目类别", true, 50);
                            if (string.IsNullOrEmpty(inviteType))
                            {
                                inviteType = prjName.GetInviteBidType();
                            }
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "广州政府采购", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);

                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList fileNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (fileNode != null && fileNode.Count > 0)
                            {
                                for (int k = 0; k < fileNode.Count; k++)
                                {
                                    ATag fileAtag = fileNode[k].GetATag();
                                    if (fileAtag.IsAtagAttach())
                                    {
                                        string fileName = fileAtag.LinkText.ToNodeString().Replace(" ", "");
                                        string fileLink = fileAtag.Link;
                                        if (!fileLink.ToLower().Contains("http"))
                                        {
                                            fileLink = "http://www.gzggzy.cn" + fileAtag.Link;
                                        }
                                        base.AttachList.Add(ToolDb.GenBaseAttach(fileName, info.Id, fileLink));
                                    }
                                }
                            }
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }

            return(list);
        }
Beispiel #19
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "cNavBar_cTotalPages")));

            if (pageNode != null && pageNode.Count > 0)
            {
                string temp = pageNode[0].ToNodePlainString();
                try
                {
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__VIEWSTATE",
                        "cID",
                        "cFlag",
                        "Input",
                        "Left1:cID",
                        "Left1:cFlag",
                        "cNavBar:cPageSize",
                        "cNavBar:cPageIndex",
                        "Foot1:ddlLink1",
                        "Foot1:ddlLink2",
                        "Foot1:ddlLink3",
                        "Foot1:ddlLink4",
                        "Foot1:ddlLink5",
                        "__EVENTVALIDATION"
                    }, new string[] {
                        viewState,
                        "12004",
                        "2",
                        "",
                        "12004",
                        "2",
                        "12",
                        i.ToString(),
                        "",
                        "",
                        "",
                        "",
                        "",
                        eventValidation
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "95%")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        TableRow tr = table.Rows[j];
                        string   code = string.Empty, prjName = string.Empty, beginDate = string.Empty, InfoUrl = string.Empty;

                        ATag atag = tr.Columns[0].GetATag();



                        beginDate = tr.Columns[1].ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://www.jinwan.gov.cn/" + atag.Link.GetReplace("../");
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "fonth21")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            parser = new Parser(new Lexer(htmldtl));
                            NodeList nameNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "fonth19")));

                            prjName = System.Web.HttpUtility.HtmlDecode(nameNode[0].ToNodePlainString()).Trim();

                            string buildUnit = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;

                            HtmlTxt   = dtlNode.AsHtml().ToLower();
                            inviteCtx = HtmlTxt.GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString();

                            Parser   imgParser = new Parser(new Lexer(HtmlTxt.ToLower()));
                            NodeList imgNode   = imgParser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                            string   src       = string.Empty;
                            if (imgNode != null && imgNode.Count > 0)
                            {
                                string imgUrl = (imgNode[0] as ImageTag).GetAttribute("src");
                                src     = "http://www.jinwan.gov.cn/" + imgUrl;
                                HtmlTxt = HtmlTxt.ToLower().GetReplace(imgUrl, src);
                            }

                            specType   = "政府采购";
                            inviteType = prjName.GetInviteBidType();
                            msgType    = "珠海市金湾区人民政府";

                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "珠海市区", "金湾区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            if (!string.IsNullOrEmpty(src))
                            {
                                string sql = string.Format("select Id from InviteInfo where InfoUrl='{0}'", info.InfoUrl);
                                object obj = ToolDb.ExecuteScalar(sql);
                                if (obj == null || obj.ToString() == "")
                                {
                                    try
                                    {
                                        BaseAttach attach = ToolHtml.GetBaseAttach(src, prjName, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                        if (attach != null)
                                        {
                                            ToolDb.SaveEntity(attach, "");
                                        }
                                    }
                                    catch { }
                                }
                            }
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k].GetATag();
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.jinwan.gov.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }

                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #20
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list    = new List <BidInfo>();
            string html    = string.Empty;
            int    pageInt = 1;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "pagination")), true), new TagNameFilter("a")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[pageNode.Count - 2].ToNodePlainString();
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.cfcpn.com/plist/jieguo?pageNo=" + i + "&kflag=0&keyword=&keywordType=&province=&city=&typeOne=", Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "cfcpn_list_content text-left")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        INode node = listNode[j];
                        ATag  aTag = node.GetATag();
                        beginDate = node.ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://www.cfcpn.com" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList telNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("p"), new HasAttributeFilter("class", "cfcpn_news_title")));
                        if (telNode != null && telNode.Count > 0)
                        {
                            prjName = telNode.AsHtml();
                            prjName = prjName.ToCtxString();
                        }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "news_content")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml();
                            bidCtx     = HtmlTxt.GetReplace("<br/>,</p>,<br>,<br />,</div>", "\r\n").ToCtxString().GetReplace("一包:\r\n", "一包:").GetReplace("一包:\r\n", "一包:").GetReplace("一包:\r\n", "一包:").GetReplace("一包:\r\n", "一包:").GetReplace("一包:\r\n", "一包:").GetReplace("一包:\r\n", "一包:").GetReplace("一包:\r\n", "一包:").GetReplace("一包:\r\n", "一包:").GetReplace("一包:\r\n", "一包:");
                            buildUnit  = bidCtx.GetBuildRegex();
                            prjAddress = bidCtx.GetAddressRegex();
                            code       = bidCtx.GetCodeRegex().GetCodeDel();

                            bidUnit = bidCtx.GetBidRegex();
                            if (string.IsNullOrEmpty(bidUnit))
                            {
                                bidUnit = bidCtx.GetRegex("成交候选人,第一中标候选人名称,一包").GetReplace("名称");
                            }
                            bidMoney = bidCtx.GetMoneyRegex();
                            prjMgr   = bidCtx.GetMgrRegex();

                            if (bidUnit.Contains("废标") || bidCtx.Contains("废除原因") || bidCtx.Contains("废止原因") || bidCtx.Contains("废标"))
                            {
                                bidUnit  = "废标";
                                prjMgr   = string.Empty;
                                bidMoney = "0";
                            }

                            if (string.IsNullOrEmpty(bidUnit))
                            {
                                parser = new Parser(new Lexer(HtmlTxt));
                                NodeList tableNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("border", "1")));
                                if (tableNode != null && tableNode.Count > 0)
                                {
                                    TableTag dtlTable = tableNode[0] as TableTag;
                                    string   ctx      = string.Empty;
                                    if (dtlTable.RowCount > 1)
                                    {
                                        try
                                        {
                                            for (int r = 0; r < dtlTable.Rows[0].ColumnCount; r++)
                                            {
                                                ctx += dtlTable.Rows[0].Columns[r].ToNodePlainString() + ":";
                                                ctx += dtlTable.Rows[1].Columns[r].ToNodePlainString() + "\r\n";
                                            }
                                        }
                                        catch { }
                                        bidUnit = ctx.GetBidRegex();
                                        if (bidMoney == "0")
                                        {
                                            bidMoney = ctx.GetMoneyRegex();
                                        }
                                        if (string.IsNullOrEmpty(prjMgr))
                                        {
                                            prjMgr = ctx.GetMgrRegex();
                                        }
                                    }
                                }
                            }
                            bidUnit    = bidUnit.GetReplace("名称,&#160", "");
                            buildUnit  = buildUnit.GetReplace("&#160");
                            prjAddress = prjAddress.GetReplace("&#160");
                            prjName    = prjName.GetReplace("&#160");
                            code       = code.GetReplace("&#160");
                            prjMgr     = prjMgr.GetReplace("&#160");
                            specType   = "政府采购";
                            bidType    = prjName.GetInviteBidType();
                            msgType    = "中国金融集中采购网";

                            BidInfo info = ToolDb.GenBidInfo("全国", "金融专项采购", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int a = 0; a < aNode.Count; a++)
                                {
                                    ATag file = aNode[a].GetATag();
                                    if (file.IsAtagAttach())
                                    {
                                        string link = file.Link;
                                        if (!link.ToLower().Contains("http"))
                                        {
                                            link = "http://www.cfcpn.com/" + file.Link;
                                        }
                                        base.AttachList.Add(ToolDb.GenBaseAttach(file.LinkText, info.Id, link));
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #21
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list     = new List <BidInfo>();
            int    sqlCount = 0;
            string html     = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + this.MaxCount);
            }
            catch { return(null); }
            int startIndex = html.IndexOf("{");
            int endIndex   = html.LastIndexOf("}");

            html = html.Substring(startIndex, (endIndex + 1) - startIndex);
            JavaScriptSerializer        serializer  = new JavaScriptSerializer();
            Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html);

            object[] objvalues = smsTypeJson["rows"] as object[];
            foreach (object objValue in objvalues)
            {
                Dictionary <string, object> dic = (Dictionary <string, object>)objValue;
                string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty,
                       bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                string ziGeDengJi = string.Empty, ziGeZhengShu = string.Empty, zbFangShi = string.Empty;
                code    = Convert.ToString(dic["bdBH"]);
                prjName = Convert.ToString(dic["bdName"]);
                if (prjName.Contains("测试"))
                {
                    continue;
                }
                string sel = Convert.ToString(dic["zbgsStartTime"]);
                try
                {
                    beginDate = ToolHtml.GetDateTimeByLong(Convert.ToInt64(sel)).ToString();
                }
                catch { }
                string end = Convert.ToString(dic["zbgsEndTime"]);
                try
                {
                    endDate = ToolHtml.GetDateTimeByLong(Convert.ToInt64(end)).ToString();
                }
                catch { }
                try
                {
                    bidType = Convert.ToString(dic["gcLeiXing2"]);
                }
                catch (Exception ex)
                {
                    bidType = ToolHtml.GetInviteTypes(prjName);
                }
                InfoUrl = Convert.ToString(dic["detailUrl"]);
                bool   isJson = false;
                string urll = string.Empty;
                List <Dictionary <string, object> > listAttachs = new List <Dictionary <string, object> >();
                try
                {
                    urll    = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/queryOldOTDataDetail.do?type=4&id=" + dic["dbZhongBiaoJieGuoGuid"];
                    HtmlTxt = this.ToolWebSite.GetHtmlByUrl(urll).GetJsString().GetReplace("\\t,\\r,\\n,\"");
                }
                catch (Exception ex)
                {
                    Logger.Error(prjName);
                    continue;
                }
                try
                {
                    if (string.IsNullOrWhiteSpace(HtmlTxt))
                    {
                        string htmldtl = string.Empty;
                        isJson = true;
                        try
                        {
                            urll    = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/queryZbgs.do?guid=" + dic["dbZhongBiaoJieGuoGuid"] + "&ggGuid=&bdGuid=";
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(urll);
                        }
                        catch (Exception ex)
                        {
                            Logger.Error(prjName);
                            continue;
                        }
                        Dictionary <string, object> dtlJsons = (Dictionary <string, object>)serializer.DeserializeObject(htmldtl);

                        buildUnit = Convert.ToString(dtlJsons["zbrAndLht"]);
                        bidUnit   = Convert.ToString(dtlJsons["tbrName"]);
                        bidMoney  = Convert.ToString(dtlJsons["zhongBiaoJE"]);
                        try
                        {
                            bidMoney = (decimal.Parse(bidMoney) / 1000000).ToString();
                        }
                        catch { }
                        prjMgr = Convert.ToString(dtlJsons["xiangMuJiLi"]);
                        Dictionary <string, object> gg = null;
                        try
                        {
                            gg = dtlJsons["gg"] as Dictionary <string, object>;
                        }
                        catch { }
                        Dictionary <string, object> bd = null;
                        Dictionary <string, object> gc = null;
                        Dictionary <string, object> xm = null;
                        try
                        {
                            bd = dtlJsons["bd"] as Dictionary <string, object>;
                        }
                        catch { }
                        try
                        {
                            gc = bd["gc"] as Dictionary <string, object>;
                        }
                        catch { }
                        try
                        {
                            xm = bd["xm"] as Dictionary <string, object>;
                        }
                        catch { }
                        try
                        {
                            if (prjMgr.Contains("----;"))
                            {
                                prjMgr = null;
                            }
                            ziGeDengJi = Convert.ToString(dtlJsons["ziGeDengJi"]);
                            if (ziGeDengJi.Contains("----;"))
                            {
                                ziGeDengJi = null;
                            }
                            ziGeZhengShu = Convert.ToString(dtlJsons["ziGeZhengShu"]);
                            if (ziGeZhengShu.Contains("----;"))
                            {
                                ziGeZhengShu = null;
                            }
                            zbFangShi = Convert.ToString(gc["zbFangShi"]);

                            if (zbFangShi.Contains("1"))
                            {
                                zbFangShi = "公开招标";
                            }
                            else if (zbFangShi.Contains("2"))
                            {
                                zbFangShi = "邀请招标";
                            }
                            else if (zbFangShi.Contains("4"))
                            {
                                zbFangShi = "单一来源";
                            }
                            else if (zbFangShi.Contains("5"))
                            {
                                zbFangShi = "预选招标子工程";
                            }
                        }
                        catch { }
                        string   htl      = this.ToolWebSite.GetHtmlByUrl(InfoUrl);
                        Parser   parser   = new Parser(new Lexer(htl));
                        NodeList nodelist = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "right_bg")));
                        if (nodelist != null && nodelist.Count > 0)
                        {
                            HtmlTxt = nodelist.AsHtml();
                            try
                            {
                                HtmlTxt = HtmlTxt.GetReplace("<span id=\"gcBH\"></span>", "<span id=\"gcBH\">" + code + "</span>");
                            }
                            catch { }
                            try
                            {
                                HtmlTxt = HtmlTxt.GetReplace("<span id=\"gcName\"></span>", "<span id=\"gcBH\">" + gc["gcName"] + "</span>");
                            }
                            catch { }
                            try
                            {
                                HtmlTxt = HtmlTxt.GetReplace("<span id=\"bdName\"></span>", "<span id=\"bdName\">" + prjName + "</span>");
                            }
                            catch { }
                            try
                            {
                                HtmlTxt = HtmlTxt.GetReplace("<span id=\"xmBH\"></span>", "<span id=\"xmBH\">" + xm["xm_BH"] + "</span>");
                            }
                            catch { }
                            try
                            {
                                HtmlTxt = HtmlTxt.GetReplace("<span id=\"xmName\"></span>", "<span id=\"xmName\">" + xm["xm_Name"] + "</span>");
                            }
                            catch { }
                            try
                            {
                                long zbgsStartTime = Convert.ToInt64(dtlJsons["zbgsStartTime"]);
                                HtmlTxt = HtmlTxt.GetReplace("<span id=\"zbgsStartTime\"></span>", "<span id=\"zbgsStartTime\">" + ToolHtml.GetDateTimeByLong(zbgsStartTime) + "</span>");
                            }
                            catch { }
                            try
                            {
                                long zbgsEndTime = Convert.ToInt64(dtlJsons["zbgsEndTime"]);
                                HtmlTxt = HtmlTxt.GetReplace("<span id=\"zbgsEndTime\"></span>", "<span id=\"zbgsEndTime\">" + ToolHtml.GetDateTimeByLong(zbgsEndTime) + "</span>");
                            }
                            catch { }
                            try
                            {
                                HtmlTxt = HtmlTxt.GetReplace("<span id=\"zbRName\"></span>", "<span id=\"zbRName\">" + gc["zbRName"] + "</span>");
                            }
                            catch { }
                            try
                            {
                                HtmlTxt = HtmlTxt.GetReplace("<span id=\"zbdlJG\"></span>", "<span id=\"zbdlJG\">" + gc["creatorName"] + "</span>");
                            }
                            catch { }
                            try
                            {
                                HtmlTxt = HtmlTxt.GetReplace("<span id=\"zbFangShi\"></span>", "<span id=\"zbFangShi\">" + zbFangShi + "</span>");
                            }
                            catch { }
                            try
                            {
                                HtmlTxt = HtmlTxt.GetReplace("<span id=\"tbrName\"></span>", "<span id=\"tbrName\">" + dtlJsons["tbrName"] + "</span>");
                            }
                            catch { }
                            try
                            {
                                HtmlTxt = HtmlTxt.GetReplace("<span id=\"zhongBiaoJE\"></span>", "<span id=\"zhongBiaoJE\">" + bidMoney + "万元</span>");
                            }
                            catch { }
                            try
                            {
                                HtmlTxt = HtmlTxt.GetReplace("<span id=\"zhongBiaoGQ\"></span>", "<span id=\"zhongBiaoGQ\">" + dtlJsons["zhongBiaoGQ"] + "</span>");
                            }
                            catch { }
                            try
                            {
                                HtmlTxt = HtmlTxt.GetReplace("<span id=\"xiangMuJiLi\"></span>", "<span id=\"xiangMuJiLi\">" + prjMgr + "</span>");
                            }
                            catch { }
                            try
                            {
                                HtmlTxt = HtmlTxt.GetReplace("<span id=\"ziGeDengJi\"></span>", "<span id=\"ziGeDengJi\">" + ziGeDengJi + "</span>");
                            }
                            catch { }

                            try
                            {
                                HtmlTxt = HtmlTxt.GetReplace("<span id=\"ziGeZhengShu\"></span>", "<span id=\"ziGeZhengShu\">" + ziGeZhengShu + "</span>");
                            }
                            catch { }
                            try
                            {
                                string zanding = string.IsNullOrWhiteSpace(Convert.ToString(dtlJsons["isZanDingJinE"])) ? "否" : "是";
                                HtmlTxt = HtmlTxt.GetReplace("<span id=\"isZanDingJinE\"></span>", "<span id=\"isZanDingJinE\">" + zanding + "</span>");
                            }
                            catch { }
                        }
                        try
                        {
                            string fileUrl  = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/filegroup/queryByGroupGuidZS.do?groupGuid=" + dtlJsons["ztbFileGroupGuid"];
                            string fileJson = this.ToolWebSite.GetHtmlByUrl(fileUrl);
                            Dictionary <string, object> fileDic = (Dictionary <string, object>)serializer.DeserializeObject(fileJson);
                            object[] objFile = fileDic["rows"] as object[];

                            foreach (object file in objFile)
                            {
                                Dictionary <string, object> attach = file as Dictionary <string, object>;
                                listAttachs.Add(attach);
                            }
                        }
                        catch { }
                    }
                }
                catch { continue; }



                bidCtx = HtmlTxt.Replace("<br />", "\r\n").Replace("<BR>", "\r\n").Replace("</P>", "\r\n").ToCtxString();
                if (!isJson)
                {
                    prjAddress = bidCtx.GetAddressRegex();
                    buildUnit  = bidCtx.GetBuildRegex();
                    bidUnit    = bidCtx.GetBidRegex();
                    bidMoney   = bidCtx.GetMoneyRegex();
                    prjMgr     = bidCtx.GetMgrRegex();
                    if (string.IsNullOrEmpty(code))
                    {
                        code = bidCtx.GetCodeRegex();
                    }
                }
                msgType  = "深圳市建设工程交易中心宝安分中心";
                specType = "建设工程";
                bidType  = "小型工程";
                BidInfo info = ToolDb.GenBidInfo("广东省", "深圳宝安区工程", "宝安区", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                if (!crawlAll && sqlCount >= this.MaxCount)
                {
                    return(null);
                }

                sqlCount++;
                if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx))
                {
                    if (!isJson)
                    {
                        Parser   parser   = new Parser(new Lexer(HtmlTxt));
                        NodeList fileNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                        if (fileNode != null && fileNode.Count > 0)
                        {
                            for (int f = 0; f < fileNode.Count; f++)
                            {
                                ATag tag = fileNode[f] as ATag;
                                if (tag.IsAtagAttach() || tag.Link.ToLower().Contains("downloadfile"))
                                {
                                    try
                                    {
                                        BaseAttach attach = null;
                                        string     link   = string.Empty;
                                        if (tag.Link.ToLower().Contains("http"))
                                        {
                                            link = tag.Link;
                                            if (link.StartsWith("\\"))
                                            {
                                                link = link.Substring(link.IndexOf("\\"), link.Length - link.IndexOf("\\"));
                                            }
                                            if (link.EndsWith("//"))
                                            {
                                                link = link.Remove(link.LastIndexOf("//"));
                                            }
                                            link = link.GetReplace("\\", "");
                                        }
                                        else
                                        {
                                            link = "https://www.szjsjy.com.cn:8001/" + tag.Link;
                                        }
                                        attach = ToolHtml.GetBaseAttachByUrl(link, tag.LinkText, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                        if (attach != null)
                                        {
                                            ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                                        }
                                    }
                                    catch { continue; }
                                }
                            }
                        }
                    }
                    else if (listAttachs.Count > 0)
                    {
                        foreach (Dictionary <string, object> attach in listAttachs)
                        {
                            BaseAttach attachBase = null;
                            try
                            {
                                string attachName = Convert.ToString(attach["attachName"]);
                                string attachId   = Convert.ToString(attach["attachGuid"]);
                                string link       = "https://www.szjsjy.com.cn:8001/file/downloadFile?fileId=" + attachId;

                                attachBase = ToolHtml.GetBaseAttach(link, attachName, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                if (attachBase != null)
                                {
                                    ToolDb.SaveEntity(attachBase, "SourceID,AttachServerPath");
                                }
                            }
                            catch { }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #22
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl);
            }
            catch
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "paging")));

            if (sNode != null && sNode.Count > 0)
            {
                try
                {
                    string temp = sNode.AsString().GetReplace(" ").GetRegexBegEnd(",共", "页");
                    pageInt = int.Parse(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://wanjiang.dg.gov.cn/zbgs-" + i + ".html", Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList viewList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "Party_news")), true), new TagNameFilter("p")));

                if (viewList != null && viewList.Count > 0)
                {
                    for (int j = 0; j < viewList.Count; j++)
                    {
                        ATag   aTag      = viewList[j].GetATag();
                        string beginDate = viewList[j].ToPlainTextString().GetDateRegex();
                        string prjName   = aTag.LinkText.GetReplace("[" + beginDate + "]");
                        string InfoUrl   = "http://wanjiang.dg.gov.cn/" + aTag.Link.GetReplace("./");
                        string htmDtl    = string.Empty;
                        try
                        {
                            htmDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8);
                            Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>|<style[^<]*</style>|<xml[^<]*</xml>");
                            htmDtl = regexHtml.Replace(htmDtl, "");
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmDtl));
                        NodeList dtl = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "about-nn")));
                        if (dtl != null && dtl.Count > 0)
                        {
                            string code = string.Empty, buildUnit = string.Empty,
                                   prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                                   specType = string.Empty, endDate = string.Empty,
                                   remark = string.Empty, inviteCon = string.Empty,
                                   CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;


                            HtmlTxt   = dtl.AsHtml().ToLower();
                            inviteCtx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString();

                            buildUnit  = inviteCtx.GetBuildRegex();
                            prjAddress = inviteCtx.GetAddressRegex();
                            code       = inviteCtx.GetCodeRegex().GetCodeDel();
                            if (buildUnit.Contains("地址"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址"));
                            }
                            if (buildUnit.Contains("招标代理"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理"));
                            }

                            specType   = "政府采购";
                            inviteType = prjName.GetInviteBidType();
                            msgType    = "东莞市万江区办事处办公室";

                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "东莞市区", "万江区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k].GetATag();
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://wanjiang.dg.gov.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                            if (list.Count % 20 == 0)
                            {
                                Thread.Sleep(1000 * 500);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #23
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch { return(list); }
            int      startIndex = html.IndexOf("<xml");
            int      endIndex   = html.IndexOf("</xml>");
            string   xmlstr     = html.Substring(startIndex, endIndex - startIndex).ToLower().GetReplace("infourl", "span").GetReplace("info", "div").GetReplace("publishedtime", "p");
            Parser   parser     = new Parser(new Lexer(xmlstr));
            NodeList pageNode   = parser.ExtractAllNodesThatMatch(new TagNameFilter("div"));

            if (pageNode != null && pageNode.Count > 0)
            {
                for (int i = 0; i < pageNode.Count; i++)
                {
                    parser = new Parser(new Lexer(pageNode[i].ToHtml()));
                    NodeList dateNode  = parser.ExtractAllNodesThatMatch(new TagNameFilter("p"));
                    string   beginDate = dateNode[0].ToPlainTextString().GetDateRegex();
                    parser.Reset();
                    NodeList urlNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("span"));
                    string   infoUrl = "http://www.shatian.gov.cn/publicfiles/business/htmlfiles/" + urlNode[0].ToPlainTextString();
                    string   htmldtl = string.Empty;
                    try
                    {
                        htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl).GetJsString();
                    }
                    catch { continue; }
                    parser = new Parser(new Lexer(htmldtl));
                    NodeList titleNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("title"));
                    string   prjName   = titleNode[0].ToNodePlainString();
                    if (prjName.Contains("中标"))
                    {
                        string buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        parser.Reset();
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("valign", "top")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.AsHtml();
                            bidCtx  = HtmlTxt.GetReplace("</p>", "\r\n").ToCtxString();

                            buildUnit = bidCtx.GetBuildRegex();
                            bidUnit   = bidCtx.GetBidRegex();
                            bidMoney  = bidCtx.GetMoneyRegex();
                            prjMgr    = bidCtx.GetMgrRegex();
                            code      = bidCtx.GetCodeRegex();

                            specType = "政府采购";
                            bidType  = prjName.GetInviteBidType();
                            msgType  = "东莞市沙田镇政府";
                            BidInfo info = ToolDb.GenBidInfo("广东省", "东莞市区", "沙田镇", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, infoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k].GetATag();
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.shatian.gov.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                        }
                    }
                    else if (prjName.Contains("通知"))
                    {
                        string InfoTitle = string.Empty, InfoType = string.Empty, PublistTime = string.Empty, InfoCtx = string.Empty, prjCode = string.Empty, buildUnit = string.Empty, htmlTxt = string.Empty, area = string.Empty;

                        parser.Reset();
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "concent")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            InfoTitle   = prjName;
                            PublistTime = beginDate;
                            htmlTxt     = dtlNode.AsHtml();
                            InfoCtx     = htmlTxt.ToCtxString();

                            NoticeInfo info = ToolDb.GenNoticeInfo("广东省", "东莞市区", "沙田镇", string.Empty, InfoTitle, InfoType, InfoCtx, PublistTime, string.Empty, "东莞市沙田镇政府", infoUrl, prjCode, buildUnit, string.Empty, string.Empty, "政府采购", string.Empty, htmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(htmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k].GetATag();
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.shatian.gov.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                        }
                    }
                    else
                    {
                        string code = string.Empty, buildUnit = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;
                        parser.Reset();
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("valign", "top")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt   = dtlNode[0].ToHtml();
                            inviteCtx = HtmlTxt.GetReplace("</p>", "\r\n").ToCtxString();

                            buildUnit  = inviteCtx.GetBuildRegex();
                            prjAddress = inviteCtx.GetAddressRegex();
                            code       = inviteCtx.GetCodeRegex();

                            specType   = "政府采购";
                            inviteType = prjName.GetInviteBidType();
                            msgType    = "东莞市沙田镇政府";

                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "东莞市区", "沙田镇", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, infoUrl, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k].GetATag();
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.shatian.gov.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                        }
                    }
                    if (!crawlAll && list.Count >= this.MaxCount)
                    {
                        return(list);
                    }
                }
            }
            return(list);
        }
Beispiel #24
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            //取得页码
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default).GetJsString();
            }
            catch (Exception ex)
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "95%")));

            if (nodeList != null && nodeList.Count > 0)
            {
                List <INode> list = new List <INode>();
                list.Add(nodeList[10]);
                list.Add(nodeList[4]);
                list.Add(nodeList[2]);
                foreach (INode t in list)
                {
                    TableTag table = t as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty,
                               infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        TableRow tr = table.Rows[j];
                        infoType    = "政策法规";
                        headName    = tr.Columns[0].ToNodePlainString();
                        releaseTime = tr.Columns[1].ToPlainTextString().GetDateRegex();
                        infoUrl     = "http://market.meizhou.gov.cn" + tr.Columns[0].GetATagValue("onclick").GetRegexBegEnd(",'", "',");
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString();
                        }
                        catch
                        {
                            continue;
                        }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("align", "center")));

                        if (dtlList != null && dtlList.Count > 0)
                        {
                            if (dtlList.Count > 1)
                            {
                                ctxHtml = dtlList[1].ToHtml();
                            }
                            else
                            {
                                ctxHtml = dtlList.ToHtml();
                            }
                            infoCtx = ctxHtml.ToCtxString().Replace("&gt;", "");
                            msgType = MsgTypeCosnt.MeiZhouMsgType;
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "梅州市区", string.Empty, infoCtx, infoType);
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            else
                            {
                                sqlCount++;
                                if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                                {
                                    parser = new Parser(new Lexer(htldtl));
                                    NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                    if (aNode != null && aNode.Count > 0)
                                    {
                                        for (int a = 0; a < aNode.Count; a++)
                                        {
                                            ATag aTag = aNode[a] as ATag;
                                            if (aTag.IsAtagAttach())
                                            {
                                                try
                                                {
                                                    BaseAttach baseInfo = ToolHtml.GetBaseAttach("http://market.meizhou.gov.cn" + aTag.Link, aTag.LinkText, info.Id);
                                                    if (baseInfo != null)
                                                    {
                                                        ToolDb.SaveEntity(baseInfo, string.Empty);
                                                    }
                                                }
                                                catch { }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
Beispiel #25
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <NoticeInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch { }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "huifont")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[0].ToNodePlainString().GetReplace("1/");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "?Paging=" + i, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "98%")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }
                        string InfoTitle = string.Empty, InfoType = string.Empty, PublistTime = string.Empty, InfoCtx = string.Empty, InfoUrl = string.Empty, prjCode = string.Empty, buildUnit = string.Empty, htmlTxt = string.Empty;
                        InfoType    = "答疑澄清";
                        InfoTitle   = aTag.GetAttribute("title");
                        PublistTime = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoUrl     = "http://ggzy.xjbt.gov.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            htmlTxt   = dtlNode.AsHtml();
                            InfoCtx   = htmlTxt.GetReplace("</p>,<br />,<br/>", "\r\n").ToCtxString();
                            prjCode   = InfoCtx.GetCodeRegex().GetCodeDel();
                            buildUnit = InfoCtx.GetBuildRegex();
                            if (buildUnit.Contains("电话"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("电话"));
                            }
                            NoticeInfo info = ToolDb.GenNoticeInfo("新疆维吾尔自治区", "新疆维吾尔自治区及地市", "", string.Empty, InfoTitle, InfoType, InfoCtx, PublistTime, string.Empty, "新疆生产建设兵团公共资源交易中心", InfoUrl, prjCode, buildUnit, string.Empty, string.Empty, "政府采购", "建设工程", htmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(htmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://ggzy.xjbt.gov.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #26
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <NoticeInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("nowrap", "true")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("总页数", "当前页").Replace(":", "");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "?Paging=" + i, Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "99%")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount - 1; j++)
                    {
                        string InfoTitle = string.Empty, InfoType = string.Empty, PublistTime = string.Empty, InfoCtx = string.Empty, InfoUrl = string.Empty, prjCode = string.Empty, buildUnit = string.Empty, htmlTxt = string.Empty, area = string.Empty;

                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        InfoTitle   = aTag.GetAttribute("title");
                        PublistTime = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoUrl     = "http://www.gxzbtb.cn" + aTag.Link;
                        InfoType    = "澄清变更";

                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            htmlTxt   = dtlNode.AsHtml().GetJsString();
                            InfoCtx   = htmlTxt.ToCtxString();
                            buildUnit = InfoCtx.GetBuildRegex();

                            NoticeInfo info = ToolDb.GenNoticeInfo("广西壮族自治区", "广西壮族自治区及地市", area, string.Empty, InfoTitle, InfoType, InfoCtx, PublistTime, string.Empty, "广西壮族自治区公共资源交易中心", InfoUrl, prjCode, buildUnit, string.Empty, string.Empty, "政府采购", string.Empty, htmlTxt);
                            parser = new Parser(new Lexer(htmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k].GetATag();
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.gxzbtb.cn" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }

                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #27
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <NoticeInfo>();
            int    pageInt         = 37;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;
            string url             = "http://www.gsggzyjy.cn/ajax/Controls_InfoList,App_Web_rzplwhmc.ashx?_method=getCurrentData&_session=rw";

            try
            {
                this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr);
                string post = "currentPage=1\r\nQuery=";
                html = ToolHtml.GetHtmlByUrlPost(url, post, Encoding.UTF8, ref cookiestr);
            }
            catch { return(null); }

            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        string post = "currentPage=" + i + "\r\nQuery=";
                        html = ToolHtml.GetHtmlByUrlPost(url, post, Encoding.UTF8, ref cookiestr);
                    }
                    catch { continue; }
                }
                Parser   parser   = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("li"));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string InfoTitle = string.Empty, InfoType = string.Empty, PublistTime = string.Empty, InfoCtx = string.Empty, InfoUrl = string.Empty, prjCode = string.Empty, buildUnit = string.Empty, htmlTxt = string.Empty, area = string.Empty;
                        INode  node = listNode[j];

                        ATag aTag = node.GetATag();
                        InfoTitle   = aTag.GetAttribute("title");
                        InfoType    = "控制价公示";
                        PublistTime = node.GetSpan().StringText;
                        InfoUrl     = "http://www.gsggzyjy.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "ContentPlaceHolder1_InfoHtml")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            htmlTxt = dtlNode.AsHtml();
                            InfoCtx = htmlTxt.ToCtxString();
                            parser  = new Parser(new Lexer(htmlTxt));
                            NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                            if (tableNode != null && tableNode.Count > 0)
                            {
                                string   ctx   = string.Empty;
                                TableTag table = tableNode[0] as TableTag;
                                for (int r = 0; r < table.RowCount; r++)
                                {
                                    for (int c = 0; c < table.Rows[r].ColumnCount; c++)
                                    {
                                        string temp = table.Rows[r].Columns[c].ToNodePlainString();
                                        if ((c + 1) % 2 == 0)
                                        {
                                            ctx += temp.GetReplace(":,:") + "\r\n";
                                        }
                                        else
                                        {
                                            ctx += temp.GetReplace(":,:") + ":";
                                        }
                                    }
                                }
                                buildUnit = ctx.GetBuildRegex();
                            }
                            NoticeInfo info = ToolDb.GenNoticeInfo("甘肃省", "甘肃省及地市", area, string.Empty, InfoTitle, InfoType, InfoCtx, PublistTime, string.Empty, "甘肃省公共资源交易中心", InfoUrl, prjCode, buildUnit, string.Empty, string.Empty, "水利及其他工程", string.Empty, htmlTxt);

                            parser = new Parser(new Lexer(htmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k].GetATag();
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.gsggzyjy.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }

                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #28
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new List <InviteInfo>();
            //取得页码
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl);
            }
            catch (Exception ex)
            {
                return(list);
            }

            Parser   parser  = new Parser(new Lexer(html));
            NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "pagination")));

            if (tdNodes != null && tdNodes.Count > 0)
            {
                string pageTemp = tdNodes.AsString().Replace("&nbsp;", "");
                try
                {
                    pageInt = int.Parse(pageTemp.GetRegexBegEnd("/", "页"));
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://new.sztc.com/bidBulletin/index_" + i + ".jhtml");
                    }
                    catch
                    {
                        continue;
                    }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "lb-link")), true), new TagNameFilter("li")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    if (nodeList != null && nodeList.Count > 0)
                    {
                        for (int j = 0; j < nodeList.Count; j++)
                        {
                            string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                            ATag aTag = nodeList[j].GetATag();
                            prjName = aTag.LinkText.ToNodeString().Replace(" ", "");

                            beginDate = prjName.GetDateRegex();
                            if (!string.IsNullOrEmpty(prjName))
                            {
                                prjName = prjName.Replace(beginDate, "");
                            }
                            InfoUrl = aTag.Link;
                            string htmldetail = string.Empty;
                            try
                            {
                                htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                            }
                            catch { continue; }

                            Parser   dtlparser = new Parser(new Lexer(htmldetail));
                            NodeList dtnode    = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("class", "ninfo-con"), new TagNameFilter("div")));
                            if (dtnode != null && dtnode.Count > 0)
                            {
                                HtmlTxt   = dtnode.AsHtml();
                                inviteCtx = HtmlTxt.ToCtxString();

                                code       = inviteCtx.GetCodeRegex().GetCodeDel();
                                buildUnit  = inviteCtx.GetBuildRegex();
                                prjAddress = inviteCtx.GetAddressRegex();

                                specType   = "政府采购";
                                msgType    = "深圳市国际招标有限公司";
                                inviteType = prjName.GetInviteBidType();
                                InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳社会招标", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                                list.Add(info);
                                dtlparser = new Parser(new Lexer(HtmlTxt));
                                NodeList FileTag = dtlparser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (FileTag != null && FileTag.Count > 0)
                                {
                                    for (int f = 0; f < FileTag.Count; f++)
                                    {
                                        ATag file = FileTag[f] as ATag;
                                        if (file.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (file.Link.ToLower().Contains("http"))
                                            {
                                                link = file.Link;
                                            }
                                            else
                                            {
                                                link = "http://new.sztc.com/" + file.Link;
                                            }
                                            BaseAttach attach = ToolDb.GenBaseAttach(file.ToPlainTextString(), info.Id, link);
                                            base.AttachList.Add(attach);
                                        }
                                    }
                                }
                                if (!crawlAll && list.Count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #29
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("form"), new HasAttributeFilter("name", "qPageForm")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    NodeList aNode = new Parser(new Lexer(pageNode.ToHtml())).ExtractAllNodesThatMatch(new TagNameFilter("a"));
                    if (aNode != null && aNode.Count > 0)
                    {
                        string temp = aNode[aNode.Count - 2].GetATagHref().Replace("turnOverPage", "").Replace("(", "").Replace(")", "").Replace(";", "");
                        pageInt = int.Parse(temp);
                    }
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "channelCode", "pageIndex", "pageSize", "pointPageIndexId" }, new string[] {
                        "0005", i.ToString(), "15", "1"
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://jieyang.gdgpo.com/queryMoreInfoList.do", nvc, Encoding.UTF8);
                    }
                    catch
                    {
                        continue;
                    }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "m_m_c_list")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        ATag aTag = listNode[j].GetATag(1);
                        prjName   = aTag.GetAttribute("title");
                        beginDate = listNode[j].ToPlainTextString().GetDateRegex();
                        InfoUrl   = "http://jieyang.gdgpo.com" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "zw_c_c_cont")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt   = dtlNode.AsHtml();//.Replace("<br", "\r\n<br");
                            inviteCtx = HtmlTxt.ToCtxString();
                            code      = inviteCtx.GetCodeRegex().GetCodeDel();

                            buildUnit  = inviteCtx.GetBuildRegex();
                            prjAddress = inviteCtx.GetAddressRegex();
                            inviteType = prjName.GetInviteBidType();
                            msgType    = "揭阳市政府采购";
                            specType   = "政府采购";
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "揭阳市区", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList fileNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (fileNode != null && fileNode.Count > 0)
                            {
                                for (int k = 0; k < fileNode.Count; k++)
                                {
                                    ATag fileAtag = fileNode[k].GetATag();
                                    if (fileAtag.IsAtagAttach())
                                    {
                                        string fileName = fileAtag.LinkText.ToNodeString().Replace(" ", "");
                                        string fileLink = fileAtag.Link;
                                        if (!fileLink.ToLower().Contains("http"))
                                        {
                                            fileLink = "http://jieyang.gdgpo.gov.cn" + fileAtag.Link;
                                        }
                                        base.AttachList.Add(ToolDb.GenBaseAttach(fileName, info.Id, fileLink));
                                    }
                                }
                            }
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #30
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "lb_page")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("分", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "?page=" + (i - 1).ToString(), Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "list")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string prjName = string.Empty,
                               buildUnit = string.Empty, bidUnit = string.Empty,
                               bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty,
                               beginDate = string.Empty,
                               endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty,
                               msgType = string.Empty, bidCtx = string.Empty,
                               prjAddress = string.Empty, remark = string.Empty,
                               prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;

                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[2].GetATag();
                        prjName  = aTag.LinkText;
                        bidUnit  = tr.Columns[4].ToNodePlainString();
                        bidMoney = tr.Columns[5].ToNodePlainString();
                        endDate  = tr.Columns[6].ToPlainTextString().GetDateRegex("yyyy/MM/dd");
                        InfoUrl  = "http://www.whzbtb.com/" + aTag.Link.GetReplace("../,./").Replace("&amp;", "&");
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("width", "683")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt = dtlNode.AsHtml();
                            parser  = new Parser(new Lexer(HtmlTxt));
                            NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                            if (tableNode != null && tableNode.Count > 1)
                            {
                                TableTag tableTag = tableNode[1] as TableTag;
                                for (int r = 0; r < tableTag.RowCount; r++)
                                {
                                    for (int c = 0; c < tableTag.Rows[r].ColumnCount; c++)
                                    {
                                        string temp = tableTag.Rows[r].Columns[c].ToPlainTextString().GetReplace(" ");
                                        if (string.IsNullOrWhiteSpace(temp))
                                        {
                                            continue;
                                        }

                                        if ((c + 1) % 2 == 0)
                                        {
                                            bidCtx += temp.GetReplace(":,:") + "\r\n";
                                        }
                                        else
                                        {
                                            bidCtx += temp.GetReplace(":,:") + ":";
                                        }
                                    }
                                }
                            }
                            else
                            {
                                bidCtx = HtmlTxt.ToCtxString();
                            }

                            code      = bidCtx.GetCodeRegex().GetReplace(" ");
                            buildUnit = bidCtx.GetBuildRegex().GetReplace(" ");
                            prjMgr    = bidCtx.GetMgrRegex().GetReplace(" ");
                            if (string.IsNullOrEmpty(prjMgr))
                            {
                                prjMgr = bidCtx.GetRegex("建筑师/总监/负责人").GetReplace(" ");
                            }
                            beginDate = bidCtx.GetRegex("中标公示时段").GetDateRegex("yyyy/MM/dd");
                            if (string.IsNullOrEmpty(beginDate))
                            {
                                bidCtx.GetRegex("中标公示时段").GetDateRegex();
                            }
                            if (string.IsNullOrEmpty(beginDate))
                            {
                                bidCtx.GetRegex("开标时间").GetDateRegex("yyyy/MM/dd");
                            }
                            if (string.IsNullOrEmpty(beginDate))
                            {
                                bidCtx.GetRegex("开标时间").GetDateRegex();
                            }
                            msgType  = "武汉市公共资源交易中心";
                            specType = "政府采购";
                            bidType  = "建设工程";
                            BidInfo info = ToolDb.GenBidInfo("湖北省", "湖北省及地市", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.whzbtb.com/" + a.Link.GetReplace("../,./");
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }

            return(list);
        }