Beispiel #1
0
        private void AddBidInfo(string itemName, string dtlUrl, string begin, IList list)
        {
            string prjName = string.Empty,
                   buildUnit = string.Empty, bidUnit = string.Empty,
                   bidMoney = string.Empty, code = string.Empty,
                   bidDate = string.Empty,
                   beginDate = string.Empty,
                   endDate = string.Empty, bidType = string.Empty,
                   specType = string.Empty, InfoUrl = string.Empty,
                   msgType = string.Empty, bidCtx = string.Empty,
                   prjAddress = string.Empty, remark = string.Empty,
                   prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

            beginDate  = begin;
            InfoUrl    = "http://www.gsrc.com/" + dtlUrl.Replace("./", "");
            prjName    = itemName.GetReplace("./," + begin + ",(,)");
            msgType    = "广深铁路股份有限公司";
            specType   = "建设工程";
            buildUnit  = "广深铁路股份有限公司";
            prjAddress = "见附件";
            bidType    = ToolHtml.GetInviteTypes(prjName);
            BidInfo info = ToolDb.GenBidInfo("广东省", "深圳社会招标", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, "见附件", string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, "见附件");

            list.Add(info);
            BaseAttach attach = ToolDb.GenBaseAttach(prjName, info.Id, InfoUrl);

            base.AttachList.Add(attach);
        }
Beispiel #2
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new ArrayList();
            //取得页码
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default);
            }
            catch (Exception ex)
            {
                return(list);
            }

            Parser   parser  = new Parser(new Lexer(html));
            NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "mt20 fenye2"))), new TagNameFilter("li")));

            if (tdNodes != null && tdNodes.Count > 0)
            {
                try
                {
                    for (int i = 0; i < tdNodes.Count; i++)
                    {
                        ATag aTag = tdNodes.SearchFor(typeof(ATag), true)[i] as ATag;
                        if (aTag.LinkText.Contains("末页"))
                        {
                            pageInt = Convert.ToInt32(aTag.Link.Replace("list_36_", "").Replace(".html", ""));
                            break;
                        }
                    }
                }
                catch (Exception ex) { }
            }

            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode("http://www.sz-otc.com/a/zhaobiao/zhongbiao/list_36_" + i.ToString() + ".html"), Encoding.Default);
                    }
                    catch (Exception ex)
                    {
                        continue;
                    }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "zhaobiao_list"))), new TagNameFilter("li")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 0; j < nodeList.Count; j++)
                    {
                        string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        try
                        {
                            prjName = nodeList[j].ToPlainTextString().Trim();
                            prjName = prjName.Remove(prjName.IndexOf("&"));
                            if (prjName.Contains("]"))
                            {
                                int index = prjName.IndexOf("]");
                                prjName = prjName.Substring(index, prjName.Length - index).Replace("]", "");
                            }
                            bidDate = nodeList[j].ToPlainTextString().Trim();
                            int indexS = bidDate.IndexOf("&");
                            bidDate = bidDate.Substring(indexS, bidDate.Length - indexS);
                            Regex regDate = new Regex(@"\d{4}-\d{2}-\d{2}");
                            beginDate = regDate.Match(bidDate).Value;
                        }
                        catch { }
                        ATag aTag = nodeList.SearchFor(typeof(ATag), true)[j] as ATag;
                        InfoUrl = "http://www.sz-otc.com" + aTag.Link;
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).Replace("&nbsp;", "").Trim();
                            Parser   dtlparserHTML = new Parser(new Lexer(htmldetail));
                            NodeList dtnodeHTML    = dtlparserHTML.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("class", "right_content"), new TagNameFilter("div")));
                            HtmlTxt    = dtnodeHTML.AsHtml();
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).Replace("&nbsp;", "").Replace("</br>", "\r\n").Replace("<br>", "\r\n");
                            Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>");
                            htmldetail = regexHtml.Replace(htmldetail, "");
                        }
                        catch (Exception ex) { continue; }
                        Parser   dtlparser = new Parser(new Lexer(htmldetail));
                        NodeList dtnode    = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("class", "right_content"), new TagNameFilter("div")));
                        bidCtx = dtnode.AsString();
                        Regex regBidUnit = new Regex(@"单位(:|:)[^\r\n]+\r\n");
                        bidUnit = regBidUnit.Match(bidCtx).Value.Replace("单位", "").Replace(":", "").Replace(":", "").Trim();
                        try
                        {
                            Regex regCode = new Regex(@"编号(:|:)[^\r\n]+\r\n");
                            code = regCode.Match(bidCtx).Value.Replace("编号", "").Replace(":", "").Replace(":", "").Trim();
                            if (code.Contains("点击"))
                            {
                                code = code.Remove(code.IndexOf("点击"));
                            }
                        }
                        catch { }
                        if (bidUnit == "" || bidUnit == null)
                        {
                            bidUnit = "";
                        }
                        if (Encoding.Default.GetByteCount(bidUnit) > 150)
                        {
                            bidUnit = bidUnit.Substring(0, 150);
                        }
                        Regex  regBidMoneystr = new Regex(@"金额(:|:)[^\r\n]+\r\n");
                        string monerystr      = regBidMoneystr.Match(bidCtx).Value.Replace("金额", "").Replace(":", "").Replace(":", "").Replace(",", "").Replace(",", "").Trim();
                        Regex  regBidMoney    = new Regex(@"[0-9]+[.]{0,1}[0-9]+");

                        if (!string.IsNullOrEmpty(regBidMoney.Match(monerystr).Value))
                        {
                            if (monerystr.Contains("万元") || monerystr.Contains("万美元"))
                            {
                                bidMoney = regBidMoney.Match(monerystr).Value;
                            }
                            else
                            {
                                try
                                {
                                    bidMoney = (decimal.Parse(regBidMoney.Match(monerystr).Value) / 10000).ToString();
                                    if (decimal.Parse(bidMoney) < decimal.Parse("0.1"))
                                    {
                                        bidMoney = "0";
                                    }
                                }
                                catch (Exception)
                                {
                                    bidMoney = "0";
                                }
                            }
                        }
                        else
                        {
                            bidMoney = "0";
                        }
                        if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney))
                        {
                            Regex regBidMoneystr1 = new Regex(@"¥[^\r\n]+\r\n");
                            monerystr = regBidMoneystr1.Match(bidCtx).Value.Replace("¥", "").Replace(":", "").Replace(":", "").Replace(",", "").Replace(",", "").Trim();
                            Regex regBidMoney1 = new Regex(@"[0-9]+[.]{0,1}[0-9]+");
                            if (!string.IsNullOrEmpty(regBidMoney1.Match(monerystr).Value))
                            {
                                if (monerystr.Contains("万元") || monerystr.Contains("万美元"))
                                {
                                    bidMoney = regBidMoney1.Match(monerystr).Value;
                                }
                                else
                                {
                                    try
                                    {
                                        bidMoney = (decimal.Parse(regBidMoney1.Match(monerystr).Value) / 10000).ToString();
                                        if (decimal.Parse(bidMoney) < decimal.Parse("0.1"))
                                        {
                                            bidMoney = "0";
                                        }
                                    }
                                    catch (Exception)
                                    {
                                        bidMoney = "0";
                                    }
                                }
                            }
                            else
                            {
                                bidMoney = "0";
                            }
                        }
                        specType = "其他";
                        msgType  = "深圳市东方招标有限公司";
                        prjName  = ToolDb.GetPrjName(prjName);
                        bidType  = ToolHtml.GetInviteTypes(prjName);
                        BidInfo info = ToolDb.GenBidInfo("广东省", "深圳社会招标", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                        list.Add(info);
                        NodeList FileTag = dtnode.SearchFor(typeof(ATag), true);
                        if (FileTag != null && FileTag.Count > 0)
                        {
                            for (int f = 0; f < FileTag.Count; f++)
                            {
                                ATag file = FileTag[f] as ATag;
                                if (file.Link.ToUpper().Contains(".DOC"))
                                {
                                    BaseAttach attach = ToolDb.GenBaseAttach(file.Link.Replace("Ads/", "").Replace(".DOC", "").Replace(".doc", ""), info.Id, "http://www.sz-otc.com/" + file.Link);
                                    base.AttachList.Add(attach);
                                }
                            }
                        }

                        if (!crawlAll && list.Count >= this.MaxCount)
                        {
                            return(list);
                        }
                    }
                }
            }

            return(list);
        }
Beispiel #3
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList noList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "yema")));

            if (noList != null && noList.Count > 0)
            {
                string temp = noList.AsString();
                try
                {
                    Regex  reg    = new Regex(@"/[^页]+页");
                    string result = reg.Match(temp).Value.Replace("页", "").Replace("/", "");
                    pageInt = Convert.ToInt32(result);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.gy-center.net/announce/list.jhtml?visi_id=&cid=97&chid=&gid=&thistype=&searchcid=&keyword=&action=yes&interval=&page=" + i.ToString(), Encoding.Default);
                    }
                    catch
                    {
                        continue;
                    }
                }
                parser = new Parser(new Lexer(html));
                NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "tab01"))), new TagNameFilter("ul"))), new TagNameFilter("li")));
                if (dtlList != null && dtlList.Count > 0)
                {
                    for (int j = 0; j < dtlList.Count - 1; j++)
                    {
                        string prjName = string.Empty,
                               buildUnit = string.Empty, bidUnit = string.Empty,
                               bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty,
                               beginDate = string.Empty,
                               endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty,
                               msgType = string.Empty, bidCtx = string.Empty,
                               prjAddress = string.Empty, remark = string.Empty,
                               prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        string temp    = dtlList[j].ToPlainTextString();
                        string tempHtl = dtlList[j].ToHtml();
                        prjName   = ToolHtml.GetHtmlAtagValue("title", tempHtl);
                        beginDate = ToolHtml.GetRegexDateTime(temp);
                        InfoUrl   = "http://www.gy-center.net/announce/" + ToolHtml.GetHtmlAtagValue("href", tempHtl);
                        string htlDtl = string.Empty;
                        try
                        {
                            htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default);
                            htlDtl = System.Text.RegularExpressions.Regex.Replace(htlDtl, "(<script)[\\s\\S]*?(</script>)", "");
                        }
                        catch
                        {
                            continue;
                        }
                        parser = new Parser(new Lexer(htlDtl));
                        NodeList htlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "r_content_right_main")));
                        if (htlList != null && htlList.Count > 0)
                        {
                            HtmlTxt = htlList.ToHtml();
                            bidCtx  = Regex.Replace(HtmlTxt, "<[^>]*>", "").Replace("&nbsp;", "").Replace(" ", "").Replace("\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\t\t", "").Replace("\r\r", "\r").Replace("\n\n", "\n");
                            bidType = ToolHtml.GetInviteTypes(prjName);

                            string bidStr = string.Empty;
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList bidList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "MsoNormalTable")));
                            if (bidList != null && bidList.Count > 0)
                            {
                                try
                                {
                                    TableTag tab = bidList[0] as TableTag;
                                    if (tab.RowCount > 1 && tab.Rows[0].ColumnCount > 6)
                                    {
                                        bidStr  = tab.Rows[0].Columns[0].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[0].ToPlainTextString().ToNodeString() + "\r\n";
                                        bidStr += tab.Rows[0].Columns[1].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[1].ToPlainTextString().ToNodeString() + "\r\n";
                                        bidStr += tab.Rows[0].Columns[2].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[2].ToPlainTextString().ToNodeString() + "\r\n";
                                        bidStr += tab.Rows[0].Columns[3].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[3].ToPlainTextString().ToNodeString() + "\r\n";
                                        bidStr += tab.Rows[0].Columns[4].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[4].ToPlainTextString().ToNodeString() + "\r\n";
                                        bidStr += tab.Rows[0].Columns[5].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[5].ToPlainTextString().ToNodeString() + "\r\n";
                                        bidStr += tab.Rows[0].Columns[6].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[6].ToPlainTextString().ToNodeString() + "\r\n";
                                    }
                                    else if (tab.RowCount > 1 && tab.Rows[0].ColumnCount > 5)
                                    {
                                        bidStr  = tab.Rows[0].Columns[0].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[0].ToPlainTextString().ToNodeString() + "\r\n";
                                        bidStr += tab.Rows[0].Columns[1].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[1].ToPlainTextString().ToNodeString() + "\r\n";
                                        bidStr += tab.Rows[0].Columns[2].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[2].ToPlainTextString().ToNodeString() + "\r\n";
                                        bidStr += tab.Rows[0].Columns[3].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[3].ToPlainTextString().ToNodeString() + "\r\n";
                                        bidStr += tab.Rows[0].Columns[4].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[4].ToPlainTextString().ToNodeString() + "\r\n";
                                        bidStr += tab.Rows[0].Columns[5].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[5].ToPlainTextString().ToNodeString() + "\r\n";
                                    }
                                    else if (tab.RowCount > 1 && tab.Rows[0].ColumnCount > 4)
                                    {
                                        bidStr  = tab.Rows[0].Columns[0].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[0].ToPlainTextString().ToNodeString() + "\r\n";
                                        bidStr += tab.Rows[0].Columns[1].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[1].ToPlainTextString().ToNodeString() + "\r\n";
                                        bidStr += tab.Rows[0].Columns[2].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[2].ToPlainTextString().ToNodeString() + "\r\n";
                                        bidStr += tab.Rows[0].Columns[3].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[3].ToPlainTextString().ToNodeString() + "\r\n";
                                        bidStr += tab.Rows[0].Columns[4].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[4].ToPlainTextString().ToNodeString() + "\r\n";
                                    }
                                    else if (tab.RowCount > 1 && tab.Rows[0].ColumnCount > 3)
                                    {
                                        bidStr  = tab.Rows[0].Columns[0].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[0].ToPlainTextString().ToNodeString() + "\r\n";
                                        bidStr += tab.Rows[0].Columns[1].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[1].ToPlainTextString().ToNodeString() + "\r\n";
                                        bidStr += tab.Rows[0].Columns[2].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[2].ToPlainTextString().ToNodeString() + "\r\n";
                                        bidStr += tab.Rows[0].Columns[3].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[3].ToPlainTextString().ToNodeString() + "\r\n";
                                    }
                                    else if (tab.RowCount > 1 && tab.Rows[0].ColumnCount > 2)
                                    {
                                        bidStr  = tab.Rows[0].Columns[0].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[0].ToPlainTextString().ToNodeString() + "\r\n";
                                        bidStr += tab.Rows[0].Columns[1].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[1].ToPlainTextString().ToNodeString() + "\r\n";
                                        bidStr += tab.Rows[0].Columns[2].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[2].ToPlainTextString().ToNodeString() + "\r\n";
                                    }
                                    else if (tab.RowCount > 1 && tab.Rows[0].ColumnCount > 1)
                                    {
                                        bidStr  = tab.Rows[0].Columns[0].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[0].ToPlainTextString().ToNodeString() + "\r\n";
                                        bidStr += tab.Rows[0].Columns[1].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[1].ToPlainTextString().ToNodeString() + "\r\n";
                                    }
                                }
                                catch { }
                            }
                            buildUnit  = ToolHtml.GetRegexString(bidCtx, ToolHtml.BuildRegex);
                            prjAddress = ToolHtml.GetRegexString(bidCtx, ToolHtml.AddressRegex);
                            code       = ToolHtml.GetRegexString(bidCtx, ToolHtml.CodeRegex);
                            bidUnit    = ToolHtml.GetRegexString(bidCtx, ToolHtml.BidRegex);
                            if (string.IsNullOrEmpty(bidUnit))
                            {
                                bidUnit = ToolHtml.GetRegexString(bidStr.Replace("  ", ""), ToolHtml.BidRegex, false);
                            }
                            if (string.IsNullOrEmpty(bidUnit))
                            {
                                bidUnit = bidCtx.GetRegexBegEnd("确认", "为");
                            }
                            bidMoney = ToolHtml.GetRegexString(bidCtx, ToolHtml.MoneyRegex);
                            bidMoney = ToolHtml.GetRegexMoney(bidMoney);

                            if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0")
                            {
                                bidMoney = bidCtx.GetRegexBegEnd("¥", "元").GetMoney();
                            }

                            buildUnit  = ToolHtml.GetSubString(buildUnit, 150);
                            prjAddress = ToolHtml.GetSubString(prjAddress, 150);
                            code       = ToolHtml.GetSubString(code, 50);
                            bidUnit    = ToolHtml.GetSubString(bidUnit, 150);

                            bidUnit   = ToolHtml.GetStringTemp(bidUnit);
                            buildUnit = ToolHtml.GetStringTemp(buildUnit);

                            if (string.IsNullOrEmpty(code))
                            {
                                code = "见中标信息";
                            }
                            if (string.IsNullOrEmpty(prjAddress))
                            {
                                prjAddress = "见中标信息";
                            }
                            specType = "其他";
                            msgType  = "工网在线";
                            BidInfo info = ToolDb.GenBidInfo("广东省", "电网专项工程", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType,
                                                             bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList nodeAtag = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (nodeAtag != null && nodeAtag.Count > 0)
                            {
                                for (int c = 0; c < nodeAtag.Count; c++)
                                {
                                    ATag a = nodeAtag[c] as ATag;
                                    if (a.Link.IsAtagAttach())
                                    {
                                        string     alink  = "http://www.bidding.csg.cn/" + a.Link;
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText.Replace("&nbsp", ""), info.Id, alink);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #4
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default, ref cookiestr);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser    = new Parser(new Lexer(htl));
            NodeList nodeList  = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "scott")), true), new TagNameFilter("a")));
            Regex    regexPage = new Regex(@"共\d+页");

            try
            {
                Regex numpage = new Regex(@"[0-9]+[.]{0,1}[0-9]+");
                ATag  link    = (ATag)nodeList[nodeList.Count - 1];
                page = Convert.ToInt32(numpage.Match(link.Link).Value.Trim());
            }
            catch (Exception)
            { }
            for (int i = 1; i <= page; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(htl);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(htl);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "newtitle",
                        "totalRows",
                        "pageNO"
                    }, new string[] {
                        string.Empty,
                        "0",
                        i.ToString()
                    });
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.Default, ref cookiestr).Replace("<th", "<td").Replace("</th>", "</td>").Replace("&nbsp;", "");
                    }
                    catch (Exception ex) { continue; }
                }
                parser = new Parser(new Lexer(htl));
                NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "cnewslist")));
                if (tableNodeList != null && tableNodeList.Count > 0)
                {
                    TableTag table = (TableTag)tableNodeList[0];
                    for (int j = 1; j < table.RowCount - 2; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, img = string.Empty,
                               HtmlTxt = string.Empty;
                        TableRow tr    = table.Rows[j];
                        prjName = tr.Columns[0].ToPlainTextString().Trim();
                        endDate = tr.Columns[2].ToPlainTextString().Trim();
                        ATag aTag = tr.Columns[0].SearchFor(typeof(ATag), true)[0] as ATag;
                        InfoUrl = aTag.Link;
                        ImageTag image = aTag.SearchFor(typeof(ImageTag), true)[0] as ImageTag;
                        //beginDate = DateTime.Now.Date.ToString();
                        //if (image == null)
                        //{
                        //    beginDate = endDate;
                        //    endDate = string.Empty;
                        //}
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(InfoUrl), Encoding.Default).Replace("<th", "<td").Replace("</th>", "</td>").Replace("</TH>", "</td>").Replace("<TH", "<td").Replace("&nbsp;", "");
                        }
                        catch (Exception)
                        {
                            Logger.Error("InviteZhuHaiJS");
                            continue;
                        }

                        Parser   parserdetail = new Parser(new Lexer(htmldetail));
                        NodeList dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "borderTB")));
                        if (dtnode.Count > 0)
                        {
                            HtmlTxt = dtnode.AsHtml();
                            TableTag tabletwo = (TableTag)dtnode[0];
                            for (int row = 0; row < tabletwo.RowCount; row++)
                            {
                                TableRow r = tabletwo.Rows[row];

                                for (int k = 0; k < r.ColumnCount; k++)
                                {
                                    string st  = string.Empty;
                                    string st1 = string.Empty;
                                    st = r.Columns[k].ToPlainTextString().Trim();
                                    if (k + 1 < r.ColumnCount)
                                    {
                                        st1 = r.Columns[k + 1].ToPlainTextString().Trim();
                                    }
                                    inviteCtx += st + ":" + st1 + "\r\n";
                                    if (k + 1 <= r.ColumnCount)
                                    {
                                        k++;
                                    }
                                }
                            }

                            Regex regBuidUnit = new Regex(@"(招标人|招标人/招标代理)(:|:)[^\r\n]+\r\n");
                            buildUnit = regBuidUnit.Match(inviteCtx).Value.Replace("招标人:", "").Replace("招标人/招标代理:", "").Trim();
                            Regex regPrjAddr = new Regex(@"(建设地点|项目地址|建设单位)(:|:)[^\r\n]+\r\n");
                            prjAddress = regPrjAddr.Match(inviteCtx).Value.Replace("建设单位:", "").Replace("建设地点:", "").Replace("项目地址", "").Replace(":", "").Trim();
                            if (Encoding.Default.GetByteCount(prjAddress) > 200 || prjAddress == "")
                            {
                                prjAddress = "见招标信息";
                            }
                            Regex regcode = new Regex(@"项目编号(:|:)[^\r\n]+\r\n");
                            code      = regcode.Match(inviteCtx).Value.Replace("项目编号:", "").Replace(":", "").Trim();
                            beginDate = inviteCtx.GetRegex("报名时间").GetDateRegex();
                            if (string.IsNullOrEmpty(beginDate) || DateTime.Parse(beginDate) > DateTime.Now)
                            {
                                beginDate = DateTime.Now.ToString("yyyy-MM-dd");
                            }
                            msgType   = "珠海市建设工程交易中心";
                            specType  = "建设工程";
                            inviteCtx = inviteCtx.Replace("<?", "").Replace("xml:namespace prefix = st1 ns = ", "").Replace("urn:schemas-microsoft-com:office:smarttags", "").Replace("/>", "").Trim();
                            inviteCtx = inviteCtx.Replace("<?", "").Replace("xml:namespace prefix = o ns = ", "").Replace("urn:schemas-microsoft-com:office:office", "").Replace("/>", "").Trim();
                            Regex regInvType = new Regex(@"[^\r\n]+[\r\n]{1}");

                            if (buildUnit == "")
                            {
                                buildUnit = "";
                            }
                            inviteType = ToolHtml.GetInviteTypes(prjName);
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "珠海市区", "",
                                                                   string.Empty, code, prjName, prjAddress, buildUnit,
                                                                   beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parserdetail.Reset();
                            NodeList nodeListtwo = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "Noprint")), true), new TagNameFilter("a")));
                            if (nodeListtwo.Count > 0)
                            {
                                ATag       aTa3g  = nodeListtwo[0] as ATag;
                                BaseAttach attach = ToolDb.GenBaseAttach("工作议程(点击下载)", info.Id, aTa3g.Link);
                                base.AttachList.Add(attach);
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #5
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list     = new ArrayList();
            int   sqlCount = 0;
            //取得页码
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = ToolHtml.GetHtmlByUrlEncode(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                Logger.Error(ex.ToString());
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("cellspacing", "2"), new TagNameFilter("table")));

            if (sNode != null && sNode.Count > 0)
            {
                string pageString = sNode.AsString();
                Regex  regexPage  = new Regex(@",共[^页]+页,");
                Match  pageMatch  = regexPage.Match(pageString);
                try { pageInt = int.Parse(pageMatch.Value.Replace(",共", "").Replace("页,", "").Trim()); }
                catch (Exception) { }
            }
            string cookiestr = string.Empty;

            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "__VIEWSTATEENCRYPTED", "__EVENTVALIDATION", "ctl00$hdnPageCount" }, new string[] { "ctl00$Content$GridView1", "Page$" + i.ToString(), viewState, "", eventValidation, pageInt.ToString() });
                    html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8);
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "ctl00_Content_GridView1"), new TagNameFilter("table")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty,
                               inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty,
                               endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr = table.Rows[j] as TableRow;
                        code      = tr.Columns[1].ToPlainTextString().Trim();
                        prjName   = tr.Columns[2].ToPlainTextString().Trim();
                        buildUnit = tr.Columns[3].ToPlainTextString().Trim();
                        beginDate = tr.Columns[5].ToPlainTextString().Trim();
                        endDate   = tr.Columns[6].ToPlainTextString().Trim();
                        ATag aTag = tr.Columns[2].Children[0] as ATag;
                        InfoUrl = "http://www.szjsjy.com.cn/BusinessInfo/" + aTag.Link;
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = ToolHtml.GetHtmlByUrlEncode(InfoUrl, Encoding.UTF8).Replace("&nbsp;", "").Trim();
                            Parser   dtlparserHTML = new Parser(new Lexer(htmldetail));
                            NodeList dtnodeHTML    = dtlparserHTML.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "lblXXNR"), new TagNameFilter("span")));
                            HtmlTxt    = dtnodeHTML.AsHtml();
                            htmldetail = ToolHtml.GetHtmlByUrlEncode(InfoUrl, Encoding.UTF8).Replace("&nbsp;", "").Replace("</br>", "\r\n").Replace("<br>", "\r\n");
                        }
                        catch (Exception ex) { continue; }
                        Parser   dtlparser = new Parser(new Lexer(htmldetail));
                        NodeList dtnode    = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "lblXXNR"), new TagNameFilter("span")));

                        inviteCtx = dtnode.AsString().Replace(" ", "");
                        Regex regPrjAdd = new Regex(@"(工程地点|工程地址):[^\r\n]+[\r\n]{1}");
                        prjAddress = regPrjAdd.Match(inviteCtx).Value.Replace("工程地点:", "").Replace("工程地址:", "").Trim();
                        msgType    = "深圳市建设工程交易中心";
                        specType   = "建设工程";
                        Regex  regInvType = new Regex(@"[^\r\n]+[\r\n]{1}");
                        string InvType    = regInvType.Match(inviteCtx).Value;

                        inviteType = ToolHtml.GetInviteTypes(InvType);
                        #region 2013-11-19修改
                        Dictionary <string, Regex> dicRegex = new Dictionary <string, Regex>();
                        dicRegex.Add("重要提示", new Regex(@"([.\S\s]*)(?=重要提示)"));
                        dicRegex.Add("温馨提示", new Regex(@"([.\S\s]*)(?=温馨提示)"));
                        foreach (string dicValue in dicRegex.Keys)
                        {
                            if (inviteCtx.Contains(dicValue))
                            {
                                inviteCtx = dicRegex[dicValue].Match(inviteCtx).Value;
                            }
                        }
                        #endregion
                        InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳市工程", string.Empty, string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, string.Empty, InfoUrl, HtmlTxt);
                        if (!crawlAll && sqlCount >= this.MaxCount)
                        {
                            return(null);
                        }
                        sqlCount++;
                        if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx))
                        {
                            dtlparser.Reset();
                            NodeList dlNodes = dtlparser.ExtractAllNodesThatMatch(new TagNameFilter("a"));//
                            if (dlNodes != null && dlNodes.Count > 0)
                            {
                                for (int f = 0; f < dlNodes.Count; f++)
                                {
                                    ATag fileTag = dlNodes[f] as ATag;
                                    if (fileTag.IsAtagAttach())
                                    {
                                        //BaseAttach attach = ToolDb.GenBaseAttach(fileTag.StringText, info.Id, fileTag.Link.Replace("..", "http://www.szjsjy.com.cn"));
                                        try
                                        {
                                            BaseAttach attach = ToolHtml.GetBaseAttach(fileTag.Link.Replace("..", "http://www.szjsjy.com.cn"), fileTag.LinkText, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                            if (attach != null)
                                            {
                                                ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                                            }
                                        }
                                        catch { }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #6
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new List <InviteInfo>();
            //取得页码
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl + "1", Encoding.UTF8);
            }
            catch
            {
                return(list);
            }

            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("class", "paging"), new TagNameFilter("div")));

            if (sNode != null && sNode.Count > 0)
            {
                string temp = sNode[0].ToNodePlainString();
                try
                {
                    temp    = temp.GetRegexBegEnd("/", "转到");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }

            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl + i, Encoding.UTF8);
                    }
                    catch
                    {
                        continue;
                    }
                }

                parser = new Parser(new Lexer(html));
                sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new HasAttributeFilter("class", "column-info-list"), new TagNameFilter("div")), true), new TagNameFilter("li")));
                if (sNode != null && sNode.Count > 0)
                {
                    for (int t = 0; t < sNode.Count; t++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        ATag aTag = sNode[t].GetATag();
                        prjName   = aTag.LinkText.ToNodeString();
                        InfoUrl   = "http://ggzy.zhaoqing.gov.cn" + aTag.Link;
                        beginDate = sNode[t].ToPlainTextString().GetDateRegex();
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        Parser   dtlparser = new Parser(new Lexer(htmldetail));
                        NodeList dtnode    = dtlparser.ExtractAllNodesThatMatch(new TagNameFilter("body"));
                        if (dtnode != null && dtnode.Count > 0)
                        {
                            HtmlTxt   = dtnode.AsHtml();
                            inviteCtx = HtmlTxt.ToCtxString();



                            buildUnit = inviteCtx.GetBuildRegex();
                            if (buildUnit.Contains("中心"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("中心")) + "中心";
                            }
                            prjAddress = inviteCtx.GetAddressRegex();
                            code       = inviteCtx.GetCodeRegex().GetCodeDel();

                            msgType    = "肇庆市公共资源交易中心";
                            inviteType = ToolHtml.GetInviteTypes(prjName);
                            specType   = "建设工程";
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "肇庆市区", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #7
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.UTF8, ref cookiestr);
            }
            catch (Exception ex)
            {
                return(list);
            }

            Parser   parser   = new Parser(new Lexer(htl));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("script"), new HasAttributeFilter("type", "text/javascript")));
            string   b        = pageNode.AsString().GetCtxBr();
            string   c        = b.Replace("('", "徐鑫").Replace("')", "凯德");

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = c.GetRegexBegEnd("徐鑫", "凯德");
                    page = int.Parse(temp);
                }
                catch { }
            }

            for (int i = 1; i <= page; i++)
            {
                if (i >= 1)
                {
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(
                        new string[] { "fcInfotitle",
                                       "currentPage" },
                        new string[] {
                        "",
                        i.ToString()
                    }
                        );
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl("https://www.dgzb.com.cn/ggzy/website/WebPagesManagement/findListByPage?fcInfotype=1&tenderkind=A&projecttendersite=SS&orderFiled=fcInfoenddate&orderValue=desc", nvc, Encoding.UTF8);
                    }
                    catch
                    {
                        continue;
                    }
                }
                JavaScriptSerializer        serializer  = new JavaScriptSerializer();
                Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(htl);
                foreach (KeyValuePair <string, object> obj in smsTypeJson)
                {
                    object[] array = (object[])obj.Value;

                    foreach (object arrValue in array)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        Dictionary <string, object> dic = (Dictionary <string, object>)arrValue;
                        code      = Convert.ToString(dic["fcTendersn"]);
                        prjName   = Convert.ToString(dic["fcInfotitle"]);
                        beginDate = Convert.ToString(dic["fcInfostartdate"]).GetDateRegex("yyyy-MM-dd");

                        string xu = Convert.ToString(dic["id"]);
                        InfoUrl = "https://www.dgzb.com.cn/ggzy/website/WebPagesManagement/jsdetail?publishId=" + xu + "&fcInfotype=1";
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(InfoUrl), Encoding.UTF8).Replace("&nbsp;", "");
                        }
                        catch (Exception)
                        {
                            continue;
                        }
                        Parser   parserdetail = new Parser(new Lexer(htmldetail));
                        NodeList dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "detail")));
                        if (dtnode.Count > 0 && dtnode != null)
                        {
                            HtmlTxt = dtnode.AsHtml();

                            inviteCtx = HtmlTxt.Replace("</p>", "\r\n").ToCtxString();

                            prjAddress = inviteCtx.GetRegexBegEnd("工程地址:", "\r");
                            buildUnit  = inviteCtx.GetRegexBegEnd("建设单位:", "\r");

                            msgType  = "东莞市建设工程交易中心";
                            specType = "建设工程";
                            Regex regoType = new Regex(@"工程类型(:|:)[^\r\n]+\r\n");
                            otherType = regoType.Match(inviteCtx).Value.Replace("工程类型:", "").Trim();
                            inviteCtx = inviteCtx.Replace("ctl00_cph_context_span_MetContent", "").Replace("<span id=", "").Replace("</span>", "").Replace(">", "").Trim();
                            if (buildUnit == "")
                            {
                                buildUnit = "见招标信息";
                            }
                            inviteType = ToolHtml.GetInviteTypes(prjName);
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "东莞市区", "",
                                                                   string.Empty, code, prjName, prjAddress, buildUnit,
                                                                   beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);//附件搜索
                            parserdetail.Reset();
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aTagNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aTagNode != null && aTagNode.Count > 0)
                            {
                                for (int k = 0; k < aTagNode.Count; k++)
                                {
                                    ATag aTag = aTagNode[k].GetATag();
                                    if (aTag.IsAtagAttach())
                                    {
                                        string linkurl = aTag.Link;
                                        linkurl = linkurl.Replace("&amp;", "&");
                                        string cc = string.Empty;
                                        string aa = linkurl.GetRegexBegEnd("&", "id");
                                        if (aa == "")
                                        {
                                            cc = linkurl;
                                        }
                                        else
                                        {
                                            cc = linkurl.Replace(aa, "");
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(aTag.LinkText, info.Id, cc);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }


                //for (int i = 1; i < page; i++)
                //{
                //    if (i > 1)
                //    {
                //        viewState = this.ToolWebSite.GetAspNetViewState(htl);
                //        eventValidation = this.ToolWebSite.GetAspNetEventValidation(htl);
                //        NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[]{
                //            "__EVENTTARGET",
                //            "__EVENTARGUMENT",
                //            "__LASTFOCUS",
                //            "__VIEWSTATE",
                //            "__EVENTVALIDATION",
                //            "ctl00$cph_context$drp_selSeach",
                //            "ctl00$cph_context$txt_strWhere",
                //            "ctl00$cph_context$drp_Rq",
                //            "ctl00$cph_context$GridViewPaingTwo1$txtGridViewPagingForwardTo",
                //            "ctl00$cph_context$GridViewPaingTwo1$btnNext.x",
                //            "ctl00$cph_context$GridViewPaingTwo1$btnNext.y"
                //        }, new string[]{
                //            string.Empty,
                //            string.Empty,
                //            string.Empty,
                //            viewState,
                //            eventValidation,
                //            "1",
                //            string.Empty,
                //            "3",
                //            (i-1).ToString(),
                //            "8",
                //            "10"
                //        });
                //        try
                //        {
                //            htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                //        }
                //        catch (Exception ex) { continue; }
                //    }
                //    parser = new Parser(new Lexer(htl));
                //    NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_cph_context_GridView1")));
                //    if (tableNodeList != null && tableNodeList.Count > 0)
                //    {
                //        TableTag table = (TableTag)tableNodeList[0];
                //        for (int j = 1; j < table.RowCount; j++)
                //        {
                //            string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                //                prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                //                specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                //                remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                //                CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                //            TableRow tr = table.Rows[j];
                //            code = tr.Columns[1].ToPlainTextString().Trim();
                //            prjName = tr.Columns[2].ToPlainTextString().Trim();
                //            beginDate = tr.Columns[4].ToPlainTextString().Trim().GetReplace(" - ", "&").Split('&')[0].Trim();
                //            try
                //            {
                //                endDate = tr.Columns[4].ToPlainTextString().Trim().GetReplace(" - ", "&").Split('&')[1].Trim();
                //            }
                //            catch { }
                //            ATag aTag = tr.Columns[2].SearchFor(typeof(ATag), true)[0] as ATag;
                //            InfoUrl = "http://www.dgzb.com.cn:8080/dgjyweb/sitemanage/" + aTag.Link.Replace("amp;", "").Trim();
                //            string htmldetail = string.Empty;
                //            try
                //            {
                //                htmldetail = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(InfoUrl), Encoding.UTF8).Replace("&nbsp;", "");
                //            }
                //            catch (Exception)
                //            {
                //                continue;
                //            }
                //            Parser parserdetail = new Parser(new Lexer(htmldetail));
                //            NodeList dtnode = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "ctl00_cph_context_span_MetContent")));
                //            if (dtnode.Count > 0 && dtnode != null)
                //            {
                //                HtmlTxt = dtnode.AsHtml();
                //                inviteCtx = dtnode.ToHtml().Replace("<br/>", "\r\n");
                //                Regex regBuidUnit = new Regex(@"建设单位:[^\r\n]+\r\n");
                //                buildUnit = regBuidUnit.Match(inviteCtx).Value.Replace("建设单位:", "").Replace(":", "").Trim();
                //                Regex regPrjAddr = new Regex(@"(工程地点|工程地址)(:|:)[^\r\n]+\r\n");
                //                prjAddress = regPrjAddr.Match(inviteCtx).Value.Replace("工程地点:", "").Replace("工程地址", "").Replace(":", "").Trim();
                //                msgType = "东莞市建设工程交易中心";
                //                specType = "建设工程";
                //                Regex regoType = new Regex(@"工程类型(:|:)[^\r\n]+\r\n");
                //                otherType = regoType.Match(inviteCtx).Value.Replace("工程类型:", "").Trim();
                //                inviteCtx = inviteCtx.Replace("ctl00_cph_context_span_MetContent", "").Replace("<span id=", "").Replace("</span>", "").Replace(">", "").Trim();
                //                if (buildUnit == "")
                //                {
                //                    buildUnit = "见招标信息";
                //                }
                //                inviteType = ToolHtml.GetInviteTypes(prjName);
                //                InviteInfo info = ToolDb.GenInviteInfo("广东省", "东莞市区", "",
                //                    string.Empty, code, prjName, prjAddress, buildUnit,
                //                    beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                //                list.Add(info);//附件搜索
                //                parserdetail.Reset();
                //                NodeList fileNode = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_cph_context_DownLoadFiles1_GridView2")));
                //                if (fileNode != null && fileNode.Count > 0)
                //                {
                //                    string iii = fileNode.AsString().Trim();
                //                    TableTag tablefile = (TableTag)fileNode[0];
                //                    for (int k = 1; k < tablefile.RowCount; k++)
                //                    {
                //                        string fileName = string.Empty, fileUrl = string.Empty;
                //                        TableRow trfile = tablefile.Rows[k];
                //                        if (trfile.Columns[1].ToPlainTextString().Trim() != "")
                //                        {
                //                            ATag aTagfile = trfile.Columns[1].SearchFor(typeof(ATag), true)[0] as ATag;
                //                            fileName = trfile.Columns[1].ToPlainTextString().Trim();
                //                            fileUrl = "http://www.dgzb.com.cn/dgjyweb/sitemanage/" + aTagfile.Link.Replace("amp;", "").Trim();
                //                            BaseAttach attach = ToolDb.GenBaseAttach(fileName, info.Id, fileUrl);
                //                            base.AttachList.Add(attach);
                //                        }
                //                    }
                //                }
                //                parserdetail.Reset();//补充文件搜索
                //                NodeList fileBuChongNode = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_cph_context_BuChongFileDown1_GridView2")));
                //                if (fileBuChongNode != null && fileBuChongNode.Count > 0)
                //                {
                //                    string iii = fileBuChongNode.AsString().Trim();
                //                    TableTag tableBuChongfile = (TableTag)fileBuChongNode[0];
                //                    for (int k = 1; k < tableBuChongfile.RowCount; k++)
                //                    {
                //                        string fileName = string.Empty, fileUrl = string.Empty;
                //                        TableRow trfileBuChong = tableBuChongfile.Rows[k];
                //                        if (trfileBuChong.Columns[1].ToPlainTextString().Trim() != "")
                //                        {
                //                            ATag aTagfile = trfileBuChong.Columns[1].SearchFor(typeof(ATag), true)[0] as ATag;
                //                            fileName = trfileBuChong.Columns[1].ToPlainTextString().Trim();
                //                            fileUrl = "http://www.dgzb.com.cn/dgjyweb/sitemanage/" + aTagfile.Link.Replace("amp;", "").Trim();
                //                            BaseAttach attach = ToolDb.GenBaseAttach(fileName, info.Id, fileUrl);
                //                            base.AttachList.Add(attach);
                //                        }
                //                    }
                //                }
                //                if (!crawlAll && list.Count >= this.MaxCount) return list;
                //            }
                //        }
                //    }
            }
            return(null);
        }
Beispiel #8
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.UTF8, ref cookiestr);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser        = new Parser(new Lexer(htl));
            NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("align", "right")));

            if (tableNodeList != null && tableNodeList.Count > 0)
            {
                Regex regexPage = new Regex(@"共\d+页");
                page = int.Parse(regexPage.Match(tableNodeList.AsString()).Value.Trim(new char[] { '共', '页' }));
            }
            for (int i = 1; i <= page; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl("http://www.yantian.gov.cn/cn/zwgk/zfcg/zbgg/index_" + (i - 1).ToString() + ".shtml", Encoding.UTF8);
                    }
                    catch (Exception ex) { continue; }
                }
                parser = new Parser(new Lexer(htl));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "565")), true), new TagNameFilter("table")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    string url = string.Empty;
                    for (int j = 0; j < nodeList.Count; j++)
                    {
                        string beg = nodeList[j].ToPlainTextString().GetDateRegex();
                        if (string.IsNullOrEmpty(beg))
                        {
                            continue;
                        }
                        else if (j > 0 && nodeList[j].GetATagHref() == url)
                        {
                            continue;
                        }
                        url = nodeList[j].GetATagHref();
                        TableTag table = nodeList[j] as TableTag;
                        string   code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                                 prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                                 specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                                 remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                                 CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        prjName   = table.GetATagValue("title").Replace("&#41;", ")").Replace("&#40;", "(");
                        InfoUrl   = "http://www.yantian.gov.cn" + table.GetATagValue();
                        beginDate = beg;
                        string htmldetail = string.Empty;
                        if (prjName.Contains("["))
                        {
                            prjName = prjName.Remove(prjName.IndexOf("[")).ToString();
                        }
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace("&nbsp;", "").Replace("<br />", "\r\n").Trim();
                        }
                        catch
                        {
                            continue;
                        }
                        Parser   dtlparser = new Parser(new Lexer(htmldetail));
                        NodeList dtnode    = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "content")));
                        if (dtnode != null && dtnode.Count > 0)
                        {
                            HtmlTxt   = dtnode.AsHtml();
                            inviteCtx = dtnode.AsString().Replace(" ", "").Trim();
                            Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>");
                            inviteCtx = regexHtml.Replace(inviteCtx, "");
                            Regex regCode = new Regex(@"(项目序号|招标编号)(:|:)[^\r\n]+\r\n");
                            code = regCode.Match(inviteCtx).Value.Replace("招标编号:", "").Replace("项目序号:", "").Trim();
                            if (Encoding.Default.GetByteCount(code) > 50)
                            {
                                code = "";
                            }
                            msgType  = "深圳市盐田区政府采购中心";
                            specType = "建设工程";
                            Regex regBuidUnit = new Regex(@"(招标人|建设单位)(:|:)[^\r\n]+\r\n");
                            buildUnit = regBuidUnit.Match(inviteCtx).Value.Replace("招标人:", "").Replace("建设单位:", "").Trim();
                            if (Encoding.Default.GetByteCount(buildUnit) > 150)
                            {
                                buildUnit = "";
                            }
                            if (prjAddress == "")
                            {
                                prjAddress = "见招标信息";
                            }
                            if (Encoding.Default.GetByteCount(prjAddress) > 200)
                            {
                                prjAddress = "";
                            }
                            inviteCtx  = inviteCtx.Replace("<ahref=", "").Replace("/service/", "").Replace("</a>", "").Replace("您是第", "").Replace("位访问者粤ICP备06000803号", "").Replace(">", "").Trim();
                            prjName    = prjName.Replace("&ldquo;", "").Replace("&rdquo;", "").Trim();
                            inviteType = ToolHtml.GetInviteTypes(prjName);
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳区及街道工程", "",
                                                                   string.Empty, code, prjName, prjAddress, buildUnit,
                                                                   beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #9
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new List <BidInfo>();
            //取得页码
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl);
            }
            catch
            {
                return(list);
            }

            Parser   parser  = new Parser(new Lexer(html));
            NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "pagePanel")));

            if (tdNodes != null && tdNodes.Count > 0)
            {
                string pageTemp = tdNodes.AsString().Replace("&nbsp;", "").Replace(" ", "").Trim();
                try
                {
                    pageInt = int.Parse(pageTemp.GetRegexBegEnd("总", "页"));
                }
                catch (Exception ex) { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl + "&page=" + i);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "ST12")), true), new TagNameFilter("li")));

                if (nodeList != null && nodeList.Count > 0)
                {
                    for (int j = 0; j < nodeList.Count; j++)
                    {
                        string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        ATag aTag = nodeList[j].GetATag();

                        prjName = aTag.GetAttribute("title");
                        if (prjName.Contains("声明"))
                        {
                            continue;
                        }
                        beginDate = nodeList[j].ToPlainTextString().GetDateRegex();
                        InfoUrl   = aTag.Link;
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        Parser   dtlparser = new Parser(new Lexer(htmldetail));
                        NodeList dtnode    = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("class", "content"), new TagNameFilter("div")));

                        if (dtnode != null && dtnode.Count > 0)
                        {
                            HtmlTxt = dtnode.AsHtml().GetReplace("<!--[if !supportLists]-->,<!--[endif]-->");
                            bidCtx  = HtmlTxt.ToCtxString();

                            code      = bidCtx.GetCodeRegex().GetCodeDel();
                            buildUnit = bidCtx.GetBuildRegex();
                            if (string.IsNullOrEmpty(buildUnit))
                            {
                                buildUnit = bidCtx.GetRegex("采购单位,招标代理");
                            }


                            bidUnit = bidCtx.GetBidRegex();
                            if (bidUnit.Contains("公司"))
                            {
                                bidUnit = bidUnit.Remove(bidUnit.IndexOf("公司")) + "公司";
                            }

                            bidMoney = bidCtx.GetMoneyRegex();
                            if (bidMoney == "0" || string.IsNullOrWhiteSpace(bidMoney))
                            {
                                bidMoney = bidCtx.GetMoneyRegex(new string[] { "中标金额" }, false, "万元");
                            }
                            prjMgr = bidCtx.GetMgrRegex();
                            try
                            {
                                if (decimal.Parse(bidMoney) >= 100000)
                                {
                                    bidMoney = (decimal.Parse(bidMoney) / 10000).ToString();
                                }
                            }
                            catch { }
                            specType = "政府采购";
                            msgType  = "中国远东国际招标公司";
                            bidType  = ToolHtml.GetInviteTypes(prjName);
                            BidInfo info = ToolDb.GenBidInfo("广东省", "深圳社会招标", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            dtlparser = new Parser(new Lexer(HtmlTxt));
                            NodeList FileTag = dtlparser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (FileTag != null && FileTag.Count > 0)
                            {
                                for (int f = 0; f < FileTag.Count; f++)
                                {
                                    ATag file = FileTag[f] as ATag;
                                    if (file.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (file.Link.ToLower().Contains("http"))
                                        {
                                            link = file.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.cfet.com.cn/" + file.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(file.ToPlainTextString(), info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #10
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new ArrayList();
            //取得页码
            int    pageInt         = 1;
            int    crawlMax        = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default).Replace("&nbsp;", "");
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "ny_21 tc")));

            if (sNode != null && sNode.Count > 0)
            {
                string pageString = sNode.AsString().Trim();
                Regex  regexPage  = new Regex(@"createPageHTML\([^\)]+\)");
                Match  pageMatch  = regexPage.Match(pageString);
                try { pageInt = int.Parse(pageMatch.Value.Replace("createPageHTML(", "").Replace(")", "").Split(',')[0].Trim()); }
                catch (Exception) { }
            }
            string cookiestr = string.Empty;

            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl + "index_" + (i - 1).ToString() + ".html", Encoding.Default); }
                    catch (Exception ex) { continue; }
                }
                parser = new Parser(new Lexer(html));
                sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "ny_22"))), new TagNameFilter("li")));
                if (sNode != null && sNode.Count > 0)
                {
                    for (int j = 0; j < sNode.Count; j++)
                    {
                        string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        INode  node   = sNode[j];
                        ATag   aTag   = node.Children.SearchFor(typeof(ATag), true)[0] as ATag;
                        Div    divTag = node.Children.SearchFor(typeof(Div), true)[1] as Div;
                        prjName   = aTag.ToPlainTextString().Trim();
                        beginDate = divTag.ToPlainTextString().Trim(new char[] { '[', ']', ' ' });
                        InfoUrl   = aTag.Link.Replace("./", "http://ztb.gaoming.gov.cn/jsgc/zbjg/");

                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).Replace("&nbsp;", "").Trim();
                            Parser   dtlparserHTML = new Parser(new Lexer(htmldetail));
                            NodeList dtnodeHTML    = dtlparserHTML.ExtractAllNodesThatMatch(new HasParentFilter(new AndFilter(new HasAttributeFilter("class", "con_10 tl"), new TagNameFilter("div"))));
                            HtmlTxt    = dtnodeHTML.AsHtml();
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).Replace("&nbsp;", "").Replace("</br>", "\r\n").Replace("<br>", "\r\n").Replace("<br/>", "\r\n");
                        }
                        catch (Exception ex) { continue; }
                        Parser   dtlparser = new Parser(new Lexer(htmldetail));
                        NodeList dtnode    = dtlparser.ExtractAllNodesThatMatch(new HasParentFilter(new AndFilter(new HasAttributeFilter("class", "con_10 tl"), new TagNameFilter("div"))));

                        if (dtnode != null && dtnode.Count > 0)
                        {
                            Regex regCtx = new Regex(@"[\n]+");
                            bidCtx = regCtx.Replace(dtnode.AsString().Replace(" ", "").Trim(), "\r\n");
                            TableTag table = dtnode.SearchFor(typeof(TableTag), true)[0] as TableTag;
                            for (int dl = 0; dl < table.RowCount; dl++)
                            {
                                TableRow tr = table.Rows[dl];
                                if (tr.Columns[0].ToPlainTextString().Contains("编号"))
                                {
                                    code = tr.Columns[1].ToPlainTextString().Trim();
                                }
                                else if (tr.Columns[0].ToPlainTextString().Contains("招标单位"))
                                {
                                    buildUnit = tr.Columns[1].ToPlainTextString().Trim();
                                }
                                else if (tr.Columns[0].ToPlainTextString().Contains("中标单位"))
                                {
                                    bidUnit = tr.Columns[1].ToPlainTextString().Trim();
                                }
                                else if (tr.Columns[0].ToPlainTextString().Contains("建造师") || tr.Columns[0].ToPlainTextString().Contains("负责人") || tr.Columns[0].ToPlainTextString().Contains("法定代表人"))
                                {
                                    prjMgr = tr.Columns[1].ToPlainTextString().Replace(" ", "").Trim();
                                }
                                else if (tr.Columns[0].ToPlainTextString().Contains("中标价"))
                                {
                                    Regex  regBidMoney = new Regex(@"[0-9]+[.]{0,1}[0-9]+");
                                    Regex  regmoneyctx = new Regex(@"[0-9]+[\%]");
                                    string bidMoneyctx = regmoneyctx.Replace(tr.Columns[1].ToPlainTextString(), "");
                                    if (!string.IsNullOrEmpty(bidMoneyctx))
                                    {
                                        if (tr.Columns[1].ToPlainTextString().Contains("万元"))
                                        {
                                            bidMoney = regBidMoney.Match(bidMoneyctx).Value;
                                        }
                                        else
                                        {
                                            try
                                            {
                                                bidMoney = (decimal.Parse(regBidMoney.Match(bidMoneyctx).Value) / 10000).ToString();
                                                if (decimal.Parse(bidMoney) < decimal.Parse("0.1"))
                                                {
                                                    bidMoney = "0";
                                                }
                                            }
                                            catch (Exception)
                                            {
                                                bidMoney = "0";
                                            }
                                        }
                                    }
                                }
                            }
                        }
                        if (Encoding.Default.GetByteCount(bidUnit) > 150)
                        {
                            try
                            {
                                if (bidUnit.Contains("第二标段"))
                                {
                                    bidUnit = bidUnit.Remove(bidUnit.IndexOf("\n")).Replace("第一标段", "").Replace(":", "").Replace(":", "");
                                }
                            }
                            catch { }
                        }

                        msgType  = "佛山市高明区建设工程交易中心";
                        specType = "建设工程";
                        prjName  = ToolDb.GetPrjName(prjName);
                        bidType  = ToolHtml.GetInviteTypes(prjName);
                        BidInfo info = ToolDb.GenBidInfo("广东省", "佛山市区", "高明区", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                        list.Add(info);
                        if (!crawlAll && list.Count >= this.MaxCount)
                        {
                            return(list);
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #11
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            string url  = "https://zhaobiao.szairport.com/SZWI/portal/homeInformListJson.do";
            IList  list = new ArrayList();
            //取得页码
            int    pageInt   = 1;
            string html      = string.Empty;
            string cookieStr = string.Empty;

            try
            {
                string post = string.Format("start={0}&limit={1}", 0, this.MaxCount);
                html = ToolHtml.GetHtmlByUrlPost(url, post, Encoding.UTF8, ref cookieStr);
            }

            catch (Exception ex)
            {
            }
            JavaScriptSerializer        serializer  = new JavaScriptSerializer();
            Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html);

            Dictionary <string, object> tempDic = smsTypeJson["recordData"] as Dictionary <string, object>;

            if (tempDic == null)
            {
                return(list);
            }

            //string totalCount = tempDic["totalCount"].ToString();
            //try
            //{
            //    pageInt = int.Parse(totalCount) / 20 + 1;
            //}
            //catch { }

            object[] objList = tempDic["records"] as object[];
            foreach (object obj in objList)
            {
                string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                            prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                            specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                            remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                            CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, img = string.Empty,
                            HtmlTxt = string.Empty;

                Dictionary <string, object> dic = obj as Dictionary <string, object>;
                prjName   = Convert.ToString(dic["title"]);
                beginDate = Convert.ToString(dic["releaseTimeStr"]);
                string seqNo = Convert.ToString(dic["seqNo"]);
                InfoUrl = "http://zhaobiao.szairport.com/SZWI/portal/homeInformView.do?seqNo=" + seqNo;
                string htmldtl = string.Empty;
                try
                {
                    htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                }
                catch { continue; }

                Parser   parser  = new Parser(new Lexer(htmldtl));
                NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "newsBox")));
                if (dtlNode != null && dtlNode.Count > 0)
                {
                    HtmlTxt   = dtlNode.AsHtml();
                    inviteCtx = HtmlTxt.ToCtxString();

                    buildUnit  = inviteCtx.GetBuildRegex();
                    prjAddress = inviteCtx.GetAddressRegex();
                    code       = inviteCtx.GetCodeRegex().GetCodeDel();
                    if (code.Contains("__"))
                    {
                        code = "";
                    }

                    specType   = "其他";
                    msgType    = "深圳宝安国际机场";
                    inviteType = ToolHtml.GetInviteTypes(prjName);
                    InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳社会招标", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                    list.Add(info);
                    parser = new Parser(new Lexer(HtmlTxt));
                    NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                    if (aNode != null && aNode.Count > 0)
                    {
                        for (int a = 0; a < aNode.Count; a++)
                        {
                            ATag aTag = aNode[a] as ATag;
                            if (aTag.IsAtagAttach())
                            {
                                string fileUrl = string.Empty;
                                if (aTag.Link.Contains("http"))
                                {
                                    fileUrl = aTag.Link;
                                }
                                else
                                {
                                    fileUrl = "http://zhaobiao.szairport.com/" + aTag.Link;
                                }
                            }
                        }
                    }
                    if (!crawlAll && list.Count >= this.MaxCount)
                    {
                        return(list);
                    }
                }
            }
            return(list);
        }
Beispiel #12
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string cookiestr       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser  = new Parser(new Lexer(html));
            NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("class", "input-group-addon")));

            if (tdNodes != null && tdNodes.Count > 0)
            {
                try
                {
                    string reTemp   = tdNodes.AsString().GetRegexBegEnd("共", "项");
                    string pageTemp = tdNodes.AsString().GetRegexBegEnd("项", "页").GetReplace("共,项,页," + reTemp + ",,");
                    pageInt = int.Parse(pageTemp);
                }
                catch (Exception) { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl + "?pi=" + (i - 1), Encoding.UTF8);
                    }
                    catch { continue; }
                }

                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "inside_table")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = (TableTag)nodeList[0];

                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string   code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, bidType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr = table.Rows[j];
                        prjName   = tr.Columns[1].ToPlainTextString().Trim();
                        buildUnit = tr.Columns[2].ToPlainTextString().Trim();
                        beginDate = tr.Columns[3].ToPlainTextString().Trim();
                        InfoUrl   = "http://www.bajsjy.com/" + tr.Columns[1].GetATagHref();
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace("<th", "<td").Replace("</th>", "</td>").Replace("&nbsp;", "");
                        }
                        catch (Exception)
                        {
                            continue;
                        }
                        Parser   parserdetail   = new Parser(new Lexer(htmldetail));
                        NodeList nodeDetailList = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "inside_table")));
                        if (nodeDetailList != null && nodeDetailList.Count > 0)
                        {
                            HtmlTxt = nodeDetailList.AsHtml();
                            TableTag tabledetail = (TableTag)nodeDetailList[0];

                            for (int r = 0; r < tabledetail.RowCount; r++)
                            {
                                TableRow trdetail = tabledetail.Rows[r];

                                for (int c = 0; c < trdetail.ColumnCount; c++)
                                {
                                    string   tr1 = string.Empty;
                                    string   tr2 = string.Empty;
                                    NodeList inptList;
                                    NodeList selList;
                                    if (trdetail.ColumnCount <= 1)
                                    {
                                        continue;
                                    }
                                    tr1 = trdetail.Columns[c].ToPlainTextString().Trim();
                                    tr2 = trdetail.Columns[c + 1].ToPlainTextString().Trim();

                                    inptList = trdetail.Columns[c + 1].SearchFor(typeof(InputTag), true);
                                    selList  = trdetail.Columns[c + 1].SearchFor(typeof(SelectTag), true);
                                    if (inptList != null && inptList.Count > 0)
                                    {
                                        if (inptList.Count > 1)
                                        {
                                            for (int inp = 0; inp < inptList.Count; inp++)
                                            {
                                                InputTag inputTage = (InputTag)inptList[inp];
                                                if (inputTage.GetAttribute("checked") == "checked")
                                                {
                                                    tr2 = inputTage.GetAttribute("value");
                                                }
                                            }
                                        }
                                        else
                                        {
                                            InputTag inputTage = (InputTag)inptList[0];
                                            tr2 = inputTage.GetAttribute("value");
                                        }
                                    }
                                    if (selList != null && selList.Count > 0)
                                    {
                                        SelectTag selTag = (SelectTag)selList[0];
                                        NodeList  opList = new NodeList();
                                        selTag.CollectInto(opList, new HasAttributeFilter("selected", "selected"));
                                        tr2 = opList.AsString();
                                    }
                                    inviteCtx += tr1 + ":" + tr2 + "\r\n";
                                    if (trdetail.ColumnCount > (c + 1))
                                    {
                                        c = c + 1;
                                    }
                                }
                            }


                            Regex regPrjAddr = new Regex(@"工程地址:[^\r\n]+\r\n");
                            prjAddress = regPrjAddr.Match(inviteCtx).Value.Replace("工程地址:", "").Trim();

                            Regex  regoType = new Regex(@"工程类型:[^\r\n]+\r\n");
                            string oType    = regoType.Match(inviteCtx).Value.Replace("工程类型:", "").Trim();

                            if (oType.Contains("房建"))
                            {
                                otherType = "房建及工业民用建筑";
                            }
                            else if (oType.Contains("市政"))
                            {
                                otherType = "市政工程";
                            }
                            else if (oType.Contains("园林绿化"))
                            {
                                otherType = "园林绿化工程";
                            }
                            else if (oType.Contains("装饰") || oType.Contains("装修"))
                            {
                                otherType = "装饰装修工程";
                            }
                            else if (oType.Contains("电力"))
                            {
                                otherType = "电力工程";
                            }
                            else if (oType.Contains("水利"))
                            {
                                otherType = "水利工程";
                            }
                            if (oType.Contains("环保"))
                            {
                                otherType = "环保工程";
                            }

                            msgType  = "深圳市建设工程交易中心宝安分中心";
                            specType = "建设工程";
                            bidType  = ToolHtml.GetInviteTypes(prjName);
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳宝安区工程", "宝安区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, bidType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);

                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #13
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(htl));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "page")), true), new TagNameFilter("a")));

            if (nodeList != null && nodeList.Count > 0)
            {
                try
                {
                    string temp = nodeList[nodeList.Count - 1].GetATagHref();
                    temp = temp.Remove(0, temp.LastIndexOf('=') + 1);
                    page = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= page; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl + "&page=" + i.ToString(), Encoding.Default);
                    }
                    catch (Exception ex) { continue; }
                }
                parser = new Parser(new Lexer(htl));
                NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("id", "listul")), true), new TagNameFilter("li")));
                if (tableNodeList != null && tableNodeList.Count > 0)
                {
                    for (int j = 0; j < tableNodeList.Count; j++)
                    {
                        string prjName = string.Empty,
                               buildUnit = string.Empty, bidUnit = string.Empty,
                               bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty,
                               beginDate = string.Empty,
                               endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty,
                               msgType = string.Empty, bidCtx = string.Empty,
                               prjAddress = string.Empty, remark = string.Empty,
                               prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                        ATag aTag = tableNodeList[j].GetATag();
                        prjName   = aTag.LinkText;
                        InfoUrl   = "http://www.czjsw.net" + aTag.Link.Replace("amp;", "").Trim();
                        beginDate = tableNodeList[j].ToPlainTextString().GetDateRegex();
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).Replace("&nbsp;", "").GetJsString();
                        }
                        catch (Exception)
                        {
                            continue;
                        }
                        Parser   parserdetail = new Parser(new Lexer(htmldetail));
                        NodeList dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "content")));
                        if (dtnode != null && dtnode.Count > 0)
                        {
                            HtmlTxt = dtnode.AsHtml();
                            bidCtx  = HtmlTxt.ToCtxString();
                            parser  = new Parser(new Lexer(HtmlTxt));

                            NodeList tableNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "content")));
                            if (tableNode != null && tableNode.Count > 0)
                            {
                                HtmlTxt = tableNode.AsHtml();
                                bidCtx  = HtmlTxt.ToCtxString();
                                bidUnit = bidCtx.GetRegexBegEnd("第一中标候选人为", ",").Replace(":", "").Replace("“", "").Replace("”", "");
                                if (string.IsNullOrEmpty(bidUnit))
                                {
                                    bidUnit = bidCtx.GetRegexBegEnd("第一中标候选人", ",").Replace(":", "").Replace("“", "").Replace("”", "");
                                }
                                bidMoney = bidCtx.GetMoneyRegex().GetMoney();

                                prjMgr = bidCtx.GetRegexBegEnd("项目经理", ";").Replace(":", "").Replace(":", "");
                                if (string.IsNullOrEmpty(prjMgr))
                                {
                                    prjMgr = bidCtx.GetRegexBegEnd("项目负责人", ",").Replace(":", "").Replace(":", "");
                                }

                                if (prjMgr.Contains(";"))
                                {
                                    prjMgr = prjMgr.Remove(prjMgr.IndexOf(";"));
                                }
                                code      = bidCtx.GetCodeRegex().GetCodeDel();
                                buildUnit = bidCtx.GetBuildRegex();
                                if (prjMgr.Contains("("))
                                {
                                    prjMgr = prjMgr.Remove(prjMgr.IndexOf("("));
                                }
                                if (prjMgr.Contains("("))
                                {
                                    prjMgr = prjMgr.Remove(prjMgr.IndexOf("("));
                                }

                                if (Encoding.Default.GetByteCount(prjMgr) >= 50)
                                {
                                    prjMgr = "";
                                }

                                msgType  = "潮州市建设工程交易中心";
                                specType = "建设工程";
                                bidType  = ToolHtml.GetInviteTypes(prjName);
                                prjName  = ToolDb.GetPrjName(prjName);
                                BidInfo info = ToolDb.GenBidInfo("广东省", "潮州市区", "", string.Empty, code, prjName, buildUnit, beginDate,
                                                                 bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType,
                                                                 bidMoney, InfoUrl, prjMgr, HtmlTxt);
                                list.Add(info);
                                if (!crawlAll && list.Count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #14
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            DateTime startDate       = DateTime.Today;
            DateTime endDates        = startDate.AddDays(-90);
            IList    list            = new ArrayList();
            int      pageInt         = 1;
            string   html            = string.Empty;
            string   viewState       = string.Empty;
            string   eventValidation = string.Empty;
            string   cookiestr       = string.Empty;

            try
            {
                NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                    "TIMEBEGIN_SHOW",
                    "TIMEEND_SHOW",
                    "TIMEBEGIN",
                    "TIMEEND",
                    "DEAL_TIME",
                    "DEAL_CLASSIFY",
                    "DEAL_STAGE",
                    "DEAL_PROVINCE",
                    "DEAL_CITY",
                    "DEAL_PLATFORM",
                    "DEAL_TRADE",
                    "isShowAll",
                    "PAGENUMBER",
                    "FINDTXT"
                }, new string[] {
                    endDates.ToString(),
                    startDate.ToString(),
                    endDates.ToString(),
                    startDate.ToString(),
                    "02",
                    "01",
                    "0101",
                    "0",
                    "0",
                    "0",
                    "0",
                    "1",
                    "1",
                    ""
                });
                try
                {
                    html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                }
                catch { }
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "paging")), true), new TagNameFilter("span")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("共", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "TIMEBEGIN_SHOW",
                        "TIMEEND_SHOW",
                        "TIMEBEGIN",
                        "TIMEEND",
                        "DEAL_TIME",
                        "DEAL_CLASSIFY",
                        "DEAL_STAGE",
                        "DEAL_PROVINCE",
                        "DEAL_CITY",
                        "DEAL_PLATFORM",
                        "DEAL_TRADE",
                        "isShowAll",
                        "PAGENUMBER",
                        "FINDTXT"
                    }, new string[] {
                        endDates.ToString(),
                        startDate.ToString(),
                        endDates.ToString(),
                        startDate.ToString(),
                        "02",
                        "01",
                        "0101",
                        "0",
                        "0",
                        "0",
                        "0",
                        "1",
                        i.ToString(),
                        ""
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "publicont")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string nlse = string.Empty;
                        string ywlx = string.Empty;
                        string sehu = string.Empty;
                        INode  node = listNode[j];
                        ATag   aTag = node.GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }
                        string nod = node.ToHtml();
                        parser = new Parser(new Lexer(nod));
                        NodeList txtNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("class", "span_on")));
                        if (txtNode != null && txtNode.Count > 0)
                        {
                            sehu = txtNode[0].ToNodePlainString();
                            nlse = txtNode[3].ToNodePlainString();
                            ywlx = txtNode[2].ToNodePlainString();
                        }
                        if (nlse.Contains("招标/资审公告"))
                        {
                            string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                                   prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                                   specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                                   remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                                   CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                            prjName    = aTag.GetAttribute("title");
                            inviteType = ToolHtml.GetInviteTypes(prjName);
                            beginDate  = node.ToPlainTextString().GetDateRegex();
                            InfoUrl    = aTag.Link.GetReplace("amp;");
                            string htmlDtl = string.Empty;
                            try
                            {
                                htmlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8);
                                htmlDtl = ToolHtml.GetRegexHtlTxt(htmlDtl);
                            }
                            catch { continue; }
                            parser = new Parser(new Lexer(htmlDtl));
                            NodeList zsList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "div_0101")));
                            if (zsList != null && zsList.Count > 0)
                            {
                                try
                                {
                                    INode  nodezs = zsList[0];
                                    ATag   aTagzs = nodezs.GetATag();
                                    string urlzs  = aTagzs.GetAttribute("onclick");
                                    string urls   = urlzs.GetReplace("showdetail(this, '0101','", "").GetReplace("')", "").Replace(",", "").Replace(")", "");
                                    urls    = "http://www.ggzy.gov.cn/information" + urls;
                                    htmlDtl = this.ToolWebSite.GetHtmlByUrl(urls, Encoding.UTF8);
                                    htmlDtl = ToolHtml.GetRegexHtlTxt(htmlDtl);
                                }
                                catch (Exception) { throw; }
                            }

                            parser = new Parser(new Lexer(htmlDtl));
                            NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "detail")));
                            if (dtlList != null && dtlList.Count > 0)
                            {
                                string ctxUrl = string.Empty;
                                HtmlTxt = dtlList.AsHtml();

                                inviteCtx = HtmlTxt.Replace("</p>", "\r\n").ToCtxString();
                                try
                                {
                                    Parser   parurl = new Parser(new Lexer(HtmlTxt));
                                    NodeList zsUrl  = parurl.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("p"), new HasAttributeFilter("class", "p_o")));
                                    if (zsUrl != null && zsUrl.Count > 0)
                                    {
                                        INode urlzs   = zsUrl[0];
                                        ATag  aTagurl = urlzs.GetATag();
                                        ctxUrl = "原文链接地址 : " + aTagurl.Link;
                                    }
                                }
                                catch (Exception ex)
                                { }
                                inviteCtx  = inviteCtx + ctxUrl;
                                prjAddress = inviteCtx.GetAddressRegex();
                                buildUnit  = inviteCtx.GetBuildRegex();
                                code       = inviteCtx.GetCodeRegex();
                                if (string.IsNullOrEmpty(buildUnit))
                                {
                                    buildUnit = inviteCtx.GetRegex("招标人");
                                }
                                buildUnit = ToolHtml.GetSubString(buildUnit, 150);
                                if (string.IsNullOrWhiteSpace(code))
                                {
                                    parser = new Parser(new Lexer(HtmlTxt));
                                    NodeList bidNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                                    if (bidNode != null && bidNode.Count > 0)
                                    {
                                        string   ctx      = string.Empty;
                                        TableTag bidTable = bidNode[0] as TableTag;
                                        try
                                        {
                                            for (int r = 0; r < bidTable.RowCount; r++)
                                            {
                                                ctx += bidTable.Rows[r].Columns[0].ToNodePlainString() + ":";
                                                ctx += bidTable.Rows[r].Columns[1].ToNodePlainString() + "\r\n";
                                            }
                                        }
                                        catch { }

                                        if (string.IsNullOrWhiteSpace(buildUnit))
                                        {
                                            buildUnit = ctx.GetBuildRegex();
                                        }

                                        if (string.IsNullOrWhiteSpace(prjAddress))
                                        {
                                            prjAddress = ctx.GetAddressRegex();
                                        }

                                        if (string.IsNullOrWhiteSpace(code))
                                        {
                                            code = ctx.GetCodeRegex();
                                        }
                                    }
                                }

                                msgType    = "国家信息中心";
                                specType   = "建设工程";
                                inviteType = "建设工程";
                                string[] provs = GetPrivoce(sehu);

                                InviteInfo info = ToolDb.GenInviteInfo(provs[0], provs[1], "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                                list.Add(info);
                                try
                                {
                                    parser = new Parser(new Lexer(HtmlTxt));
                                    NodeList nodeFm = parser.ExtractAllNodesThatMatch((new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "detail_content")))));
                                    if (dtlList != null && dtlList.Count > 0)
                                    {
                                        INode      nodFm  = nodeFm[0];
                                        ATag       aTagzs = nodFm.GetATag();
                                        string     dfe    = aTagzs.Link;
                                        BaseAttach attach = ToolDb.GenBaseAttach("内容(点击下载)", info.Id, dfe);
                                        base.AttachList.Add(attach);
                                    }
                                }
                                catch { }
                                if (!crawlAll && list.Count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }
                        }
                        else
                        {
                            continue;
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #15
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new ArrayList();

            //取得页码
            string html = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch (Exception ex)
            {
                Logger.Error(ex.ToString());
            }
            Parser   parser  = new Parser(new Lexer(html));
            int      pageInt = 1;
            NodeList sNodes  = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "pagenumber")), true), new TagNameFilter("a")));

            if (sNodes != null && sNodes.Count > 1)
            {
                string page = sNodes[sNodes.Count - 2].ToPlainTextString();
                try
                {
                    pageInt = int.Parse(page);
                }
                catch { }
            }
            parser.Reset();
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&pos=" + i.ToString(), Encoding.Default);
                    }
                    catch (Exception ex)
                    {
                        Logger.Error(ex.ToString());
                    }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "recordlist")));
                if (nodes != null && nodes.Count > 0)
                {
                    TableTag table = nodes[0] as TableTag;
                    for (int t = 0; t < table.RowCount; t++)
                    {
                        string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty,
                               code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty,
                               bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty,
                               otherType = string.Empty, HtmlTxt = string.Empty, strHtml = string.Empty;
                        TableRow tr = table.Rows[t];
                        endDate = tr.Columns[1].ToPlainTextString().GetDateRegex();
                        ATag alink = tr.Columns[0].GetATag();
                        prjName = tr.Columns[0].GetATagValue("title");
                        InfoUrl = "http://www.nmgp.gov.cn" + alink.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).Replace("&nbsp;", "").Trim();
                        }
                        catch (Exception ex)
                        {
                            continue;
                        }

                        Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>");
                        htmldtl = regexHtml.Replace(htmldtl, "");
                        Parser   parserdtl     = new Parser(new Lexer(htmldtl));
                        Parser   dtlparserHTML = new Parser(new Lexer(htmldtl));
                        NodeList nodesDtl      = dtlparserHTML.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "hlcms_9")));
                        if (nodesDtl != null && nodesDtl.Count > 0)
                        {
                            Parser   begDate = new Parser(new Lexer(nodesDtl.ToHtml()));
                            NodeList begNode = begDate.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "yzhang")));
                            if (begNode != null && begNode.Count > 0)
                            {
                                beginDate = begNode.AsString().GetDateRegex("yyyy年MM月dd日");
                            }
                            begDate.Reset();
                            NodeList dtlTable = begDate.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellpadding", "5")));
                            if (dtlTable != null && dtlTable.Count > 0)
                            {
                                TableTag tableDtl = dtlTable[0] as TableTag;
                                if (tableDtl.RowCount > 2)
                                {
                                    string ctx = tableDtl.Rows[2].ToPlainTextString();
                                    bidUnit  = ctx.GetRegexBegEnd("供应商:", ";");
                                    bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney();
                                }
                                if (bidMoney == "0" && tableDtl.RowCount > 4)
                                {
                                    string ctx = tableDtl.Rows[4].ToPlainTextString();
                                    bidUnit  = ctx.GetRegexBegEnd("供应商:", ";");
                                    bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney();
                                }
                                if (bidMoney == "0" && tableDtl.RowCount > 6)
                                {
                                    string ctx = tableDtl.Rows[6].ToPlainTextString();
                                    bidUnit  = ctx.GetRegexBegEnd("供应商:", ";");
                                    bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney();
                                }
                                if (bidMoney == "0" && tableDtl.RowCount > 8)
                                {
                                    string ctx = tableDtl.Rows[8].ToPlainTextString();
                                    bidUnit  = ctx.GetRegexBegEnd("供应商:", ";");
                                    bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney();
                                }
                                if (bidMoney == "0" && tableDtl.RowCount > 10)
                                {
                                    string ctx = tableDtl.Rows[10].ToPlainTextString();
                                    bidUnit  = ctx.GetRegexBegEnd("供应商:", ";");
                                    bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney();
                                }
                                if (bidMoney == "0" && tableDtl.RowCount > 12)
                                {
                                    string ctx = tableDtl.Rows[12].ToPlainTextString();
                                    bidUnit  = ctx.GetRegexBegEnd("供应商:", ";");
                                    bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney();
                                }
                            }
                            HtmlTxt = nodesDtl.ToHtml();
                            bidCtx  = HtmlTxt.ToCtxString();

                            code      = bidCtx.GetRegex("批准文件编号,工程编号,项目编号").Replace("无", "");
                            buildUnit = bidCtx.GetBuildRegex();
                            if (string.IsNullOrEmpty(buildUnit))
                            {
                                buildUnit = bidCtx.GetRegex("采购代理机构名称,采购单位名称");
                            }
                            prjAddress = bidCtx.GetAddressRegex();
                            if (string.IsNullOrEmpty(prjAddress))
                            {
                                prjAddress = bidCtx.GetRegex("投标地点,开标地点,地址");
                            }
                            if (bidUnit.Contains("废标"))
                            {
                                bidUnit = "没有中标商";
                            }
                            msgType  = "内蒙古政府采购盟市";
                            specType = "政府采购";
                            if (Encoding.Default.GetByteCount(code) > 50)
                            {
                                code = code.GetChina();
                            }
                            if (Encoding.Default.GetByteCount(code) > 50)
                            {
                                code = "";
                            }

                            bidType = ToolHtml.GetInviteTypes(prjName);
                            prjName = ToolDb.GetPrjName(prjName);
                            BidInfo info = ToolDb.GenBidInfo("内蒙古自治区", "内蒙古自治区及盟市", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #16
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new ArrayList();
            //取得页码
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default);
            }
            catch (Exception ex)
            {
                return(list);
            }

            Parser   parser   = new Parser(new Lexer(html));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("TABLE"), new HasAttributeFilter("style", "margin: 0")));

            if (nodeList != null && nodeList.Count > 0)
            {
                TableTag table = nodeList[0] as TableTag;
                for (int j = 6; j < table.RowCount - 3; j++)
                {
                    string   code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                    TableRow tr = table.Rows[j];
                    code    = tr.Columns[2].ToPlainTextString().Trim();
                    prjName = tr.Columns[3].ToPlainTextString().Trim();
                    //beginDate = tr.Columns[4].ToPlainTextString().Split('-')[0].Replace(".", "-").Trim();

                    ATag aTag = tr.Columns[3].SearchFor(typeof(ATag), true)[0] as ATag;
                    InfoUrl = "http://www.ymcw.com/" + aTag.Link;
                    string htmldetail = string.Empty;
                    try
                    {
                        htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).Replace("&nbsp;", "").Trim();
                        Parser   dtlparserHTML = new Parser(new Lexer(htmldetail));
                        NodeList dtnodeHTML    = dtlparserHTML.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                        HtmlTxt    = dtnodeHTML.AsHtml();
                        htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).ToLower().Replace("&nbsp;", "").Replace("</br>", "\r\n").Replace("<br>", "\r\n");
                        Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>");
                        htmldetail = regexHtml.Replace(htmldetail, "");
                    }
                    catch (Exception ex) { continue; }
                    Parser   dtlparser = new Parser(new Lexer(htmldetail));
                    NodeList dtnode    = dtlparser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                    inviteCtx = System.Web.HttpUtility.HtmlDecode(dtnode[0].ToPlainTextString().Trim());
                    Regex regCtx = new Regex(@"([\r\n]+)|([\t]+)");
                    beginDate  = DateTime.Now.ToString("yyyy-MM-dd");
                    inviteCtx  = regCtx.Replace(inviteCtx, "\r\n");
                    specType   = "其他";
                    msgType    = "深圳市裕明财务咨询有限公司";
                    inviteType = ToolHtml.GetInviteTypes(prjName);
                    InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳社会招标", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                    list.Add(info);
                    if (!crawlAll && list.Count >= this.MaxCount)
                    {
                        return(list);
                    }
                }
            }
            return(list);
        }
Beispiel #17
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string html            = string.Empty;
            string viewState       = string.Empty;
            int    sqlCount        = 0;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default, ref cookiestr);
            }
            catch (Exception ex)
            {
                return(list);
            }
            IList arr = GetPrjCode();
            IList del = arr;

            if (arr.Count > 0)
            {
                for (int d = (arr.Count - 1); d >= 0; d--)
                {
                    string htmtxt = string.Empty;
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc1 = this.ToolWebSite.GetNameValueCollection(new string[] { "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "txtPrj_ID", "txtPrj_Name", "Chk_Query", "Radiobuttonlist1", "QUERY", "ucPageNumControl:gotopage" },
                                                                                       new string[] { string.Empty, string.Empty, viewState, arr[d].ToString(), "", "0", "1", "查询", "" });
                    try
                    {
                        htmtxt = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), nvc1, Encoding.Default, ref cookiestr);
                    }
                    catch (Exception ex)
                    {
                        return(list);
                    }
                    Parser   parser = new Parser(new Lexer(htmtxt));
                    NodeList dtList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "dgConstBid")));
                    if (dtList != null && dtList.Count > 0)
                    {
                        TableTag table = dtList[0] as TableTag;
                        for (int j = 1; j < table.RowCount; j++)
                        {
                            string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                                   prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                                   specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                                   remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                                   CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                            TableRow dr = table.Rows[j];
                            code      = dr.Columns[1].ToPlainTextString().Trim();
                            prjName   = dr.Columns[2].ToPlainTextString().Trim();
                            buildUnit = dr.Columns[3].ToPlainTextString().Trim();
                            ATag aTag = dr.Columns[1].SearchFor(typeof(ATag), true)[0] as ATag;
                            InfoUrl = "http://61.144.226.2/zbgg/Detail.aspx?ID=" + aTag.Link.Trim().Replace("GoDetail('", "").Replace("');", "") + "&xxlxbh=1&PRJ_TYPE=0";
                            string htmlde = string.Empty;
                            try
                            {
                                htmlde = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(InfoUrl), Encoding.Default).Replace("&nbsp;", "");
                            }
                            catch { continue; }
                            parser = new Parser(new Lexer(htmlde));
                            NodeList dealList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "Table8")));
                            if (dealList != null && dealList.Count > 0)
                            {
                                string ctx = string.Empty;
                                HtmlTxt = dealList.ToHtml();
                                TableTag tab  = dealList[0] as TableTag;
                                string   text = string.Empty;
                                try
                                {
                                    for (int k = 0; k < tab.RowCount; k++)
                                    {
                                        TableRow tr = tab.Rows[k];
                                        text = tr.Columns[0].ToPlainTextString().Replace(":", "").Replace(":", "").Replace(" ", "") + ":".Trim();
                                        ctx += text + tr.Columns[1].ToPlainTextString().Trim().Replace(" ", "") + "\r\n";
                                    }
                                    for (int k = 0; k < tab.RowCount; k++)
                                    {
                                        TableRow tr = tab.Rows[k];
                                        text       = tr.Columns[0].ToPlainTextString().Replace(":", "").Replace(":", "") + ":".Trim();
                                        inviteCtx += text + tr.Columns[1].ToPlainTextString().Trim() + "\r\n";
                                    }
                                }
                                catch { }
                                Regex  regDate = new Regex(@"发布日期(:|:)[^\r\n]+[\r\n]{1}");
                                string datestr = regDate.Match(inviteCtx).Value.Replace("发布日期", "").Replace(":", "").Replace("\r\n", "").Replace("\r", "").Replace("\n", "");
                                if (!string.IsNullOrEmpty(datestr))
                                {
                                    try
                                    {
                                        int len = datestr.IndexOf("到");
                                        beginDate = datestr.Substring(0, len);
                                        endDate   = datestr.Substring(len + 1, datestr.Length - len - 1);
                                    }
                                    catch { }
                                }
                                Regex regPrjAdd = new Regex(@"(工程地点|工程地址):[^\r\n]+[\r\n]{1}");
                                prjAddress = regPrjAdd.Match(ctx).Value.Replace("工程地点:", "").Replace("工程地址:", "").Trim();

                                Regex regOth = new Regex(@"(工程类型|项目类型):[^\r\n]+[\r\n]{1}");
                                otherType = regOth.Match(ctx).Value.Replace("工程类型:", "").Replace("项目类型:", "").Trim();

                                msgType    = "深圳市建设工程交易中心";
                                specType   = "建设工程";
                                inviteType = ToolHtml.GetInviteTypes(prjName);
                                InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳市区", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);

                                if (sqlCount <= this.MaxCount)
                                {
                                    ToolDb.SaveEntity(info, this.ExistCompareFields);
                                    sqlCount++;
                                }
                                else
                                {
                                    return(list);
                                }
                            }
                        }
                    }
                    del.RemoveAt(d);
                    DeleteCode(del);
                }
            }
            return(list);
        }
Beispiel #18
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new ArrayList();

            //取得页码
            string html = string.Empty;

            try
            {
                DateTime time = ToolHtml.GetDateTimeByLong(1509517250628);
                DateTime dt24 = DateTime.Now.ToUniversalTime();
                string   b    = ToolHtml.GetDateTimeLong(dt24).ToString();
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl + b, Encoding.Default);
            }
            catch (Exception ex)
            {
                Logger.Error(ex.ToString());
            }
            Parser parser  = new Parser(new Lexer(html));
            int    pageInt = 1;


            JavaScriptSerializer serializer = new JavaScriptSerializer();

            object[] objs  = (object[])serializer.DeserializeObject(html);
            object[] items = objs[1] as object[];
            Dictionary <string, object> smsTypeJson = items[0] as Dictionary <string, object>;
            string a    = Convert.ToString(smsTypeJson["page_all"]);
            int    page = int.Parse(a);

            pageInt = page / 18 + 1;
            parser.Reset();
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        string lian = "http://www.nmgp.gov.cn/category/category-ajax.php?type_name=3&byf_page=" + i + "&fun=cggg&_=1509441711785";
                        html = this.ToolWebSite.GetHtmlByUrl(lian, Encoding.UTF8);
                    }
                    catch (Exception ex)
                    {
                        Logger.Error("分页");
                        continue;
                    }
                }

                parser = new Parser(new Lexer(html));
                JavaScriptSerializer        serializer1  = new JavaScriptSerializer();
                object[]                    objd         = (object[])serializer.DeserializeObject(html);
                object[]                    items1       = objd[0] as object[];
                Dictionary <string, object> smsTypeJson1 = items1[0] as Dictionary <string, object>;
                foreach (KeyValuePair <string, object> obj in smsTypeJson)
                {
                    object[] array = objd[0] as object[];
                    foreach (object arrValue in array)
                    {
                        string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty,
                                    code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                                    bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty,
                                    bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty,
                                    otherType = string.Empty, HtmlTxt = string.Empty, strHtml = string.Empty;
                        Dictionary <string, object> dic = (Dictionary <string, object>)arrValue;
                        endDate = Convert.ToString(dic["ENDDATE"]).GetDateRegex("yyyy-MM-dd");
                        prjName = Convert.ToString(dic["TITLE"]);
                        string xu = Convert.ToString(dic["wp_mark_id"]);
                        InfoUrl = "http://www.nmgp.gov.cn/ay_post/post.php?tb_id=3&p_id=" + xu;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(InfoUrl), Encoding.UTF8).Replace("&nbsp;", "");
                        }
                        catch (Exception)
                        {
                            continue;
                        }
                        Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>");
                        htmldtl = regexHtml.Replace(htmldtl, "");
                        Parser   parserdtl     = new Parser(new Lexer(htmldtl));
                        Parser   dtlparserHTML = new Parser(new Lexer(htmldtl));
                        NodeList nodesDtl      = dtlparserHTML.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "center")));
                        if (nodesDtl != null && nodesDtl.Count > 0)
                        {
                            Parser   begDate = new Parser(new Lexer(nodesDtl.ToHtml()));
                            NodeList begNode = begDate.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "center")));
                            if (begNode != null && begNode.Count > 0)
                            {
                                beginDate = begNode.AsString().GetDateRegex("yyyy年MM月dd日");
                            }
                            begDate.Reset();
                            NodeList dtlTable = begDate.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("border", "1")));
                            if (dtlTable != null && dtlTable.Count > 0)
                            {
                                TableTag tableDtl = dtlTable[0] as TableTag;
                                if (tableDtl.RowCount > 2)
                                {
                                    string ctx = tableDtl.Rows[2].ToPlainTextString();
                                    bidUnit  = ctx.GetRegexBegEnd("供应商:", ";");
                                    bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney();
                                }
                                if (bidMoney == "0" && tableDtl.RowCount > 4)
                                {
                                    string ctx = tableDtl.Rows[4].ToPlainTextString();
                                    bidUnit  = ctx.GetRegexBegEnd("供应商:", ";");
                                    bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney();
                                }
                                if (bidMoney == "0" && tableDtl.RowCount > 6)
                                {
                                    string ctx = tableDtl.Rows[6].ToPlainTextString();
                                    bidUnit  = ctx.GetRegexBegEnd("供应商:", ";");
                                    bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney();
                                }
                                if (bidMoney == "0" && tableDtl.RowCount > 8)
                                {
                                    string ctx = tableDtl.Rows[8].ToPlainTextString();
                                    bidUnit  = ctx.GetRegexBegEnd("供应商:", ";");
                                    bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney();
                                }
                                if (bidMoney == "0" && tableDtl.RowCount > 10)
                                {
                                    string ctx = tableDtl.Rows[10].ToPlainTextString();
                                    bidUnit  = ctx.GetRegexBegEnd("供应商:", ";");
                                    bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney();
                                }
                                if (bidMoney == "0" && tableDtl.RowCount > 12)
                                {
                                    string ctx = tableDtl.Rows[12].ToPlainTextString();
                                    bidUnit  = ctx.GetRegexBegEnd("供应商:", ";");
                                    bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney();
                                }
                            }
                            HtmlTxt = nodesDtl.ToHtml();
                            bidCtx  = HtmlTxt.ToCtxString();

                            code      = bidCtx.GetRegex("批准文件编号,工程编号,项目编号").Replace("无", "");
                            code      = bidCtx.GetRegexBegEnd("批准文件编号:", "二");
                            buildUnit = bidCtx.GetBuildRegex();
                            if (string.IsNullOrEmpty(buildUnit))
                            {
                                buildUnit = bidCtx.GetRegexBegEnd("代理机构名称:", "地址");
                            }
                            prjAddress = bidCtx.GetAddressRegex();
                            if (string.IsNullOrEmpty(prjAddress))
                            {
                                prjAddress = bidCtx.GetRegexBegEnd("地址:", "邮政编码");
                            }


                            msgType  = "内蒙古自治区政府采购中心";
                            specType = "政府采购";
                            bidType  = ToolHtml.GetInviteTypes(prjName);
                            prjName  = ToolDb.GetPrjName(prjName);
                            BidInfo info = ToolDb.GenBidInfo("内蒙古自治区", "内蒙古自治区及盟市", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }


                //for (int i = 1; i <= pageInt; i++)
                //{
                //    if (i > 1)
                //    {
                //        try
                //        {
                //            html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&pos=" + i.ToString(), Encoding.Default);
                //        }
                //        catch (Exception ex)
                //        {
                //            Logger.Error(ex.ToString());
                //        }
                //    }
                //    parser = new Parser(new Lexer(html));
                //    NodeList nodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "recordlist")));
                //    if (nodes != null && nodes.Count > 0)
                //    {
                //        TableTag table = nodes[0] as TableTag;
                //        for (int t = 0; t < table.RowCount; t++)
                //        {
                //            string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty,
                //                code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                //                bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty,
                //                bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty,
                //                otherType = string.Empty, HtmlTxt = string.Empty,strHtml=string.Empty;
                //            TableRow tr = table.Rows[t];
                //            endDate = tr.Columns[1].ToPlainTextString().GetDateRegex();
                //            ATag alink = tr.Columns[0].GetATag();
                //            prjName = tr.Columns[0].GetATagValue("title");
                //            InfoUrl = "http://www.nmgp.gov.cn" + alink.Link;
                //            string htmldtl = string.Empty;
                //            try
                //            {
                //                htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).Replace("&nbsp;", "").Trim();
                //            }
                //            catch (Exception ex)
                //            {
                //                continue;
                //            }

                //            Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>");
                //            htmldtl = regexHtml.Replace(htmldtl, "");
                //            Parser parserdtl = new Parser(new Lexer(htmldtl));
                //            Parser dtlparserHTML = new Parser(new Lexer(htmldtl));
                //            NodeList nodesDtl = dtlparserHTML.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "hlcms_9")));
                //            if (nodesDtl != null && nodesDtl.Count > 0)
                //            {
                //                Parser begDate = new Parser(new Lexer(nodesDtl.ToHtml()));
                //                NodeList begNode = begDate.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "yzhang")));
                //                if (begNode != null && begNode.Count > 0)
                //                {
                //                    beginDate = begNode.AsString().GetDateRegex("yyyy年MM月dd日");
                //                }
                //                begDate.Reset();
                //                NodeList dtlTable = begDate.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellpadding", "5")));
                //                if (dtlTable != null && dtlTable.Count > 0)
                //                {
                //                    TableTag tableDtl = dtlTable[0] as TableTag;
                //                    if (tableDtl.RowCount > 2)
                //                    {
                //                        string ctx = tableDtl.Rows[2].ToPlainTextString();
                //                        bidUnit = ctx.GetRegexBegEnd("供应商:",";");
                //                        bidMoney = ctx.GetRegexBegEnd("中标金额:","。").GetMoney();
                //                    }
                //                    if (bidMoney == "0"&& tableDtl.RowCount >4)
                //                    {
                //                        string ctx = tableDtl.Rows[4].ToPlainTextString();
                //                        bidUnit = ctx.GetRegexBegEnd("供应商:", ";");
                //                        bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney();
                //                    }
                //                    if (bidMoney == "0" && tableDtl.RowCount > 6)
                //                    {
                //                        string ctx = tableDtl.Rows[6].ToPlainTextString();
                //                        bidUnit = ctx.GetRegexBegEnd("供应商:", ";");
                //                        bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney();
                //                    }
                //                    if (bidMoney == "0" && tableDtl.RowCount > 8)
                //                    {
                //                        string ctx = tableDtl.Rows[8].ToPlainTextString();
                //                        bidUnit = ctx.GetRegexBegEnd("供应商:", ";");
                //                        bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney();
                //                    }
                //                    if (bidMoney == "0" && tableDtl.RowCount > 10)
                //                    {
                //                        string ctx = tableDtl.Rows[10].ToPlainTextString();
                //                        bidUnit = ctx.GetRegexBegEnd("供应商:", ";");
                //                        bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney();
                //                    }
                //                    if (bidMoney == "0" && tableDtl.RowCount > 12)
                //                    {
                //                        string ctx = tableDtl.Rows[12].ToPlainTextString();
                //                        bidUnit = ctx.GetRegexBegEnd("供应商:", ";");
                //                        bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney();
                //                    }
                //                }
                //                HtmlTxt = nodesDtl.ToHtml();
                //                bidCtx = HtmlTxt.ToCtxString();

                //                code = bidCtx.GetRegex("批准文件编号,工程编号,项目编号",true,50).Replace("无", "");
                //                buildUnit = bidCtx.GetBuildRegex();
                //                if (string.IsNullOrEmpty(buildUnit))
                //                    buildUnit = bidCtx.GetRegex("采购代理机构名称,采购单位名称");
                //                prjAddress = bidCtx.GetAddressRegex();
                //                if (string.IsNullOrEmpty(prjAddress))
                //                    prjAddress = bidCtx.GetRegex("投标地点,开标地点,地址");


                //                msgType = "内蒙古自治区政府采购中心";
                //                specType = "政府采购";
                //                bidType = ToolHtml.GetInviteTypes(prjName);
                //                prjName = ToolDb.GetPrjName(prjName);
                //                BidInfo info = ToolDb.GenBidInfo("内蒙古自治区", "内蒙古自治区及盟市", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                //                list.Add(info);
                //                if (!crawlAll && list.Count >= this.MaxCount)
                //                    return list;
                //            }
                //        }
                //    }
            }
            return(list);
        }
Beispiel #19
0
        public void DealHtml(IList list, string html, bool crawlAll)
        {
            Parser   parserDtl = new Parser(new Lexer(html));
            NodeList aNodes    = parserDtl.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "lefttable")));

            if (aNodes != null && aNodes.Count > 0)
            {
                Type     typs  = typeof(ATag);
                TableTag table = aNodes[0] as TableTag;
                for (int t = 1; t < table.RowCount - 1; t++)
                {
                    string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty,
                           inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, ctx = string.Empty, CreateTime = string.Empty, HtmlTxt = string.Empty;

                    TableRow tr   = table.Rows[t] as TableRow;
                    ATag     aTag = tr.SearchFor(typeof(ATag), true)[0] as ATag;

                    InfoUrl = aTag.Link;
                    prjName = table.Rows[t].Columns[1].ToPlainTextString().Replace("\r\n", "").Replace("\t", "").Replace("&nbsp;", " ").Trim();
                    endDate = table.Rows[t].Columns[2].ToPlainTextString().Replace("\r\n", "").Replace("\t", "").Replace("&nbsp;", " ").Trim();
                    string htmlDtl = string.Empty;
                    try
                    {
                        htmlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default);
                    }
                    catch (Exception ex)
                    {
                        continue;
                    }
                    Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>");
                    htmlDtl = regexHtml.Replace(htmlDtl, "");
                    Parser parserCtx = new Parser(new Lexer(htmlDtl));

                    NodeList ctxNode = parserCtx.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "printTb lefttable")));
                    if (ctxNode != null && ctxNode.Count > 0)
                    {
                        Parser   parserdiv = new Parser(new Lexer(htmlDtl));
                        NodeList aNodesdiv = parserdiv.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "biuuu_button")));
                        HtmlTxt = ctxNode.AsHtml().Replace(aNodesdiv.AsHtml(), "").Trim();
                        Type     tp        = typeof(ATag);
                        TableTag tabTag    = ctxNode[0] as TableTag;
                        string   startTime = tabTag.Rows[1].Columns[0].ToPlainTextString().Replace("\r\n", "").Replace("\t", "").Replace("&nbsp;", " ").Trim();
                        Regex    regex     = new Regex(@"时间:\d{4}-\d{1,2}-\d{1,2} \d{1,2}:\d{1,2}:\d{1,2}");
                        Match    math      = regex.Match(startTime);
                        beginDate = math.Value.Replace("时间:", "").Replace("\r\n", "").Replace("\t", "").Replace("&nbsp;", " ").Trim();

                        Regex regexcode = new Regex("(工程编号|项目编号|招标编号):[^\r\n]+[\r\n]{1}");
                        Match match     = regexcode.Match(tabTag.ToPlainTextString());
                        code = match.Value.Substring(match.Value.IndexOf(":") + 1).Replace("\r\n", "").Replace("\t", "").Replace("&nbsp;", " ").Trim();

                        Regex regexBuildUnit = new Regex("(招标人|建设单位|招标采购单位):[^\r\n]+[\r\n]{1}");
                        Match matchBuildUnit = regexBuildUnit.Match(tabTag.ToPlainTextString());
                        buildUnit = matchBuildUnit.Value.Substring(matchBuildUnit.Value.IndexOf(":") + 1).Replace("\r\n", "").Replace("\t", "").Replace("&nbsp;", " ").Trim();

                        Regex regexAddress = new Regex("(建设地点|项目地点|工程地点):[^\r\n]+[\r\n]{1}");
                        Match matchAddress = regexAddress.Match(tabTag.ToPlainTextString());
                        prjAddress = matchAddress.Value.Substring(matchAddress.Value.IndexOf(":") + 1).Trim();
                        ctx        = tabTag.Rows[2].Columns[0].ToPlainTextString().Replace("&nbsp;", " ").Replace("\r\n\r\n\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n");
                        if (ctx.Length > 0)
                        {
                            Regex regexCtx = new Regex("<!--[^<]+-->");
                            ctx = regexCtx.Replace(ctx, "");
                        }
                        if (Encoding.Default.GetByteCount(code) > 50)
                        {
                            code = "";
                        }
                        if (buildUnit == "" || buildUnit == null)
                        {
                            buildUnit = "";
                        }
                        if (Encoding.Default.GetByteCount(buildUnit) > 150)
                        {
                            buildUnit = buildUnit.Substring(0, 150);
                        }
                        if (Encoding.Default.GetByteCount(prjAddress) > 200)
                        {
                            prjAddress = "见招标公告内容";
                        }
                        if (beginDate.Length > 0 && endDate.Length > 0)
                        {
                            DateTime begin = new DateTime();
                            DateTime end   = new DateTime();
                            try
                            {
                                begin = DateTime.Parse(beginDate);
                                end   = DateTime.Parse(endDate);
                            }
                            catch (Exception)
                            {
                            }
                            if (begin > end)
                            {
                                endDate = string.Empty;
                            }
                        }
                    }

                    parserCtx.Reset();

                    ctxNode = parserCtx.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "toptd_bai")));
                    Regex regDate = new Regex(@"\d{4}-\d{1,2}-\d{1,2}");
                    beginDate = regDate.Match(ctxNode.AsString()).Value.Trim();
                    if (beginDate == "")
                    {
                        beginDate = string.Empty;
                    }
                    inviteType = ToolHtml.GetInviteTypes(prjName);
                    InviteInfo info = ToolDb.GenInviteInfo("广东省", "惠州市区", "惠东县", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, ctx, remark, "惠州市建设工程交易中心", inviteType, "建设工程", string.Empty, InfoUrl, HtmlTxt);
                    list.Add(info);
                    ctxNode = parserCtx.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("a"), new HasAttributeFilter("target", "_blank")));
                    NodeList aTagNodes = ctxNode.SearchFor(typeof(ATag), true);
                    for (int a = 0; a < aTagNodes.Count; a++)
                    {
                        ATag fileTage = aTagNodes[a] as ATag;
                        if (fileTage.Link.Contains("http://www.ebc.huizhou.gov.cn/index/loadNewsFile"))
                        {
                            string     downloadURL = fileTage.Link;
                            BaseAttach attach      = ToolDb.GenBaseAttach(fileTage.ToPlainTextString(), info.Id, downloadURL);
                            base.AttachList.Add(attach);
                        }
                    }
                    if (!crawlAll && list.Count >= this.MaxCount)
                    {
                        return;
                    }
                }
            }
        }
Beispiel #20
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new List <InviteInfo>();

            foreach (string area in this.DicSiteUrl.Keys)
            {
                int    pageInt = 1, count = 0;
                string html            = string.Empty;
                string viewState       = string.Empty;
                string eventValidation = string.Empty;
                try
                {
                    html = this.ToolWebSite.GetHtmlByUrl(this.DicSiteUrl[area], Encoding.UTF8);
                }
                catch (Exception ex)
                {
                    return(list);
                }

                Parser   parser = new Parser(new Lexer(html));
                NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "page")));
                if (sNode != null && sNode.Count > 0)
                {
                    try
                    {
                        string page = sNode.AsString().ToNodeString().Replace("createPageHTML(", "");
                        string temp = page.Remove(page.IndexOf(","));
                        pageInt = Convert.ToInt32(temp);
                    }
                    catch (Exception) { }
                }

                for (int i = 1; i <= pageInt; i++)
                {
                    if (i > 1)
                    {
                        try
                        {
                            html = this.ToolWebSite.GetHtmlByUrl(this.DicSiteUrl[area] + "index_" + (i - 1) + ".html".ToString(), Encoding.UTF8);
                        }
                        catch { continue; }
                    }
                    parser = new Parser(new Lexer(html));
                    sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "secondrightlistbox"))), new TagNameFilter("ul"))), new TagNameFilter("li")));
                    if (sNode != null && sNode.Count > 0)
                    {
                        for (int t = 0; t < sNode.Count; t++)
                        {
                            string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;

                            beginDate = sNode[t].ToNodePlainString().GetDateRegex();
                            prjName   = sNode[t].GetATagValue("title");

                            InfoUrl = this.DicSiteUrl[area] + sNode[t].GetATagHref().Replace("./", "");

                            string htmldetail = string.Empty;
                            try
                            {
                                htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace("&nbsp;", "").Trim();
                                Parser   dtlparserHTML = new Parser(new Lexer(htmldetail));
                                NodeList dtnodeHTML    = dtlparserHTML.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "contentrightlistbox2")));
                                HtmlTxt    = dtnodeHTML.AsHtml();
                                htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace("&nbsp;", "").Replace("</br>", "\r\n").Replace("<br>", "\r\n").Replace("<br/>", "\r\n");
                            }
                            catch (Exception ex) { continue; }
                            Parser   dtlparser = new Parser(new Lexer(htmldetail));
                            NodeList dtnode    = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "contentrightlistbox2")));

                            Regex regexCtx = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>");
                            inviteCtx = regexCtx.Replace(dtnode.AsString(), "").Replace(" ", "");
                            Regex regPrjAdd = new Regex(@"(工程地点|工程地址|项目地址)[:|:][^\r\n]+[\r\n]{1}");
                            prjAddress = regPrjAdd.Match(inviteCtx).Value.Replace("工程地点", "").Replace("工程地址", "").Replace("项目地址", "").Replace(":", "").Replace(":", "").Replace(")", "").Trim();

                            Regex regbuildUnit = new Regex(@"(招标单位|招标人):[^\r\n]+[\r\n]{1}");
                            buildUnit = regbuildUnit.Match(inviteCtx).Value.Replace("招标单位:", "").Replace("招标人:", "").Trim();
                            if (buildUnit.Contains("招标代理机构"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理机构"));
                            }
                            msgType    = "佛山市建设工程交易中心";
                            specType   = "建设工程";
                            inviteType = ToolHtml.GetInviteTypes(prjName);
                            string     are  = area != "市直" ? area : "";
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "佛山市区", are, string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            count++;
                            list.Add(info);
                            parser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.fsggzy.cn/" + a.Link.GetReplace("../,./");
                                        }
                                        if (Encoding.Default.GetByteCount(link) > 500)
                                        {
                                            continue;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && count >= this.MaxCount)
                            {
                                goto Funcs;
                            }
                        }
                    }
                }
                Funcs :;
            }
            return(list);
        }
Beispiel #21
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new ArrayList();
            //取得页码
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(list);
            }

            Parser parser = new Parser(new Lexer(html));

            NodeList sNode      = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("cellspacing", "2"), new TagNameFilter("table")));
            string   pageString = sNode.AsString();
            Regex    regexPage  = new Regex(@",共[^页]+页,");
            Match    pageMatch  = regexPage.Match(pageString);

            try { pageInt = int.Parse(pageMatch.Value.Replace(",共", "").Replace("页,", "").Trim()); }
            catch (Exception) { }

            string cookiestr = string.Empty;

            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "__VIEWSTATEENCRYPTED", "__EVENTVALIDATION", "ctl00$hdnPageCount" }, new string[] { "ctl00$Content$GridView1", "Page$" + i.ToString(), viewState, "", eventValidation, pageInt.ToString() });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8);
                    }
                    catch { continue; }
                }

                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "ctl00_Content_GridView1"), new TagNameFilter("table")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr = table.Rows[j] as TableRow;
                        code      = tr.Columns[1].ToPlainTextString().Trim();
                        prjName   = tr.Columns[2].ToPlainTextString().Trim();
                        buildUnit = tr.Columns[4].ToPlainTextString().Trim();
                        bidUnit   = tr.Columns[5].ToPlainTextString().Trim();
                        bidMoney  = tr.Columns[6].ToPlainTextString().Replace("万元", "").Trim();
                        beginDate = tr.Columns[3].ToPlainTextString().Split('至')[0].Replace("年", "-").Replace("月", "-").Replace("日", " ").Replace("时", "").Trim();
                        endDate   = tr.Columns[3].ToPlainTextString().Split('至')[1].Replace("年", "-").Replace("月", "-").Replace("日", " ").Replace("时", "").Trim();
                        ATag aTag = tr.Columns[2].Children[0] as ATag;
                        InfoUrl = "http://www.szjsjy.com.cn/BusinessInfo/" + aTag.Link;

                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace("&nbsp;", "").Trim();
                            Parser   dtlparserHTML = new Parser(new Lexer(htmldetail));
                            NodeList dtnodeHTML    = dtlparserHTML.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "lblXXNR"), new TagNameFilter("span")));
                            HtmlTxt    = dtnodeHTML.AsHtml();
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace("&nbsp;", "").Replace("</br>", "\r\n").Replace("<br>", "\r\n");
                        }
                        catch (Exception ex) { continue; }
                        Parser   dtlparser = new Parser(new Lexer(htmldetail));
                        NodeList dtnode    = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "lblXXNR"), new TagNameFilter("span")));

                        bidCtx = dtnode.AsString().Replace(" ", "");
                        Regex regPrjAdd = new Regex(@"(工程地点|工程地址):[^\r\n]+[\r\n]{1}");
                        prjAddress = regPrjAdd.Match(bidCtx).Value.Replace("工程地点:", "").Replace("工程地址:", "").Trim();
                        msgType    = "深圳市建设工程交易中心";
                        specType   = "建设工程";
                        Regex regprjMgr = new Regex(@"(项目经理|项目负责人|项目总监|建造师|监理师|项目经理姓名)(:|:)[^\s]+[\s]{1}");
                        prjMgr = regprjMgr.Match(bidCtx).Value.Replace("项目经理姓名", "").Replace("项目经理", "").Replace("项目总监", "").Replace("建造师", "").Replace("项目负责人", "").Replace(":", "").Replace(":", "").Replace("监理师", "").Trim();

                        string bidUnitInfo = bidCtx.GetBidRegex();

                        if (!string.IsNullOrEmpty(bidUnitInfo))
                        {
                            bidUnit = bidUnitInfo;
                        }

                        Regex  regInvType = new Regex(@"[^\r\n]+[\r\n]{1}");
                        string InvType    = regInvType.Match(bidCtx).Value;

                        prjName = ToolDb.GetPrjName(prjName);
                        if (!string.IsNullOrEmpty(bidUnit))
                        {
                            bidUnit = ToolDb.GetBidUnit(bidUnit);
                            if (bidUnit.Contains("报价"))
                            {
                                bidUnit = bidUnit.Remove(bidUnit.IndexOf("报价"));
                            }
                        }
                        bidType = ToolHtml.GetInviteTypes(InvType);
                        BidInfo info = ToolDb.GenBidInfo("广东省", "深圳市工程", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, string.Empty, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                        list.Add(info);

                        dtlparser.Reset();
                        NodeList dlNodes = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "trFujian"), new TagNameFilter("tr")));
                        if (dlNodes != null && dlNodes.Count > 0)
                        {
                            TableRow attr      = dlNodes[0] as TableRow;
                            NodeList fileNodes = attr.SearchFor(typeof(ATag), true);
                            if (fileNodes != null && fileNodes.Count > 0)
                            {
                                for (int f = 0; f < fileNodes.Count; f++)
                                {
                                    ATag fileTag = fileNodes[f] as ATag;
                                    if (!string.IsNullOrEmpty(fileTag.Link))
                                    {
                                        BaseAttach attach = ToolDb.GenBaseAttach(fileTag.StringText, info.Id, fileTag.Link.Replace("..", "http://www.szjsjy.com.cn"));
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                        }
                        if (!crawlAll && list.Count >= this.MaxCount)
                        {
                            return(list);
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #22
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 10;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl);
            }
            catch
            {
                return(list);
            }
            for (int i = 1; i <= page; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl("http://www.szgas.com.cn/node_200865_" + i + ".htm");
                    }
                    catch
                    {
                        continue;
                    }
                }
                Parser   parser        = new Parser(new Lexer(htl));
                NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "news")), true), new TagNameFilter("li")));
                if (tableNodeList.Count > 0)
                {
                    for (int j = 0; j < tableNodeList.Count; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        ATag aTag = tableNodeList[j].GetATag();
                        prjName   = aTag.LinkText.Trim();
                        beginDate = tableNodeList[j].ToPlainTextString().GetDateRegex();
                        if (aTag.Link.Contains("http"))
                        {
                            InfoUrl = aTag.Link.GetReplace("&#38;", "&");
                        }
                        else
                        {
                            InfoUrl = "http://www.szgas.com.cn/" + aTag.Link.Trim().GetReplace("&#38;", "&");
                        }
                        string[] urls = InfoUrl.Split('?');
                        if (urls.Length > 1)
                        {
                            InfoUrl = "http://www.sztc.com/tender/InfoPubDisplay.aspx?" + urls[1];
                        }

                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch
                        {
                            continue;
                        }
                        Parser   parserdetail = new Parser(new Lexer(htmldetail));
                        NodeList dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "ninfo-con")));

                        if (dtnode.Count > 0)
                        {
                            HtmlTxt   = dtnode.AsHtml();
                            inviteCtx = HtmlTxt.Replace("</span>", "\r\n").ToCtxString();
                            //inviteCtx = inviteCtx.Replace("&#160", "").Replace("http://www.szgas.com.cn", "").Replace(";", "").Trim();
                            prjAddress = inviteCtx.GetAddressRegex();

                            code = inviteCtx.GetCodeRegex();
                            if (string.IsNullOrWhiteSpace(code))
                            {
                                code = inviteCtx.GetRegexBegEnd("招标编号:", "进行公开招标");
                            }
                            if (string.IsNullOrWhiteSpace(code))
                            {
                                code = inviteCtx.GetRegexBegEnd("公开招标", ",欢迎");
                            }

                            msgType    = "深圳燃气集团公司";
                            specType   = "建设工程";
                            prjAddress = "见中标信息";
                            buildUnit  = "深圳燃气集团公司";
                            inviteType = ToolHtml.GetInviteTypes(prjName);
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳社会招标", "",
                                                                   string.Empty, code, prjName, prjAddress, buildUnit,
                                                                   beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #23
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(htl));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "headStyle27kkt9g3gy")));

            if (nodeList != null && nodeList.Count > 0)
            {
                try
                {
                    string temp = nodeList[0].ToPlainTextString().GetRegexBegEnd("/", "首").ToLower().Replace("&nbsp;", "");
                    page = int.Parse(temp);
                }
                catch { }
            }
            else
            {
                page = 25;
            }
            for (int i = page; i >= 1; i--)
            {
                if (i < page)
                {
                    try
                    {
                        string url = "http://zbcg.sziit.edu.cn/zbxx/" + i + ".htm";
                        htl = this.ToolWebSite.GetHtmlByUrl(url, Encoding.UTF8);
                    }
                    catch   { continue; }
                }
                parser = new Parser(new Lexer(htl));
                NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "winstyle66953")));
                if (tableNodeList != null && tableNodeList.Count > 0)
                {
                    TableTag table = tableNodeList[0] as TableTag;
                    for (int j = 0; j < table.RowCount - 1; j++)
                    {
                        TableRow tr = table.Rows[j];
                        if (tr.ColumnCount < 2)
                        {
                            continue;
                        }

                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, img = string.Empty,
                               HtmlTxt = string.Empty, downUrl = string.Empty, downName = string.Empty;


                        prjName = tr.Columns[1].ToNodePlainString();
                        if (prjName.Contains("暂停公告"))
                        {
                            continue;
                        }
                        beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex("yyyy/MM/dd");

                        InfoUrl = "http://zbcg.sziit.edu.cn/" + tr.Columns[1].GetATagHref().Replace("../", "").Replace("./", "");
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch
                        {
                            continue;
                        }
                        Parser   parserdetail = new Parser(new Lexer(htmldetail));
                        NodeList dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "vsb_content")));
                        if (dtnode != null && dtnode.Count > 0)
                        {
                            HtmlTxt = dtnode.AsHtml();
                            Regex regeximg = new Regex(@"<IMG[^>]*>");//去掉图片
                            HtmlTxt   = regeximg.Replace(HtmlTxt, "");
                            inviteCtx = dtnode.AsString().Replace("&nbsp;", "").Trim();
                            Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>");
                            inviteCtx  = regexHtml.Replace(inviteCtx, "").Replace(" ", "").Replace("&ldquo;", "").Replace("&rdquo;", "").Trim();
                            code       = inviteCtx.GetCodeRegex().GetCodeDel();
                            prjAddress = inviteCtx.GetAddressRegex();
                            Regex  regBegin = new Regex(@"投标截止时间:[^\r\n]+[\r\n]{1}");
                            string date     = regBegin.Match(inviteCtx).Value.Replace("投标截止时间:", "").Replace(" ", "").Trim();
                            Regex  regDate  = new Regex(@"\d{4}年\d{1,2}月\d{1,2}日");
                            endDate = regDate.Match(date).Value.Trim();
                            Regex regBuidUnit = new Regex(@"(招标机构|委托单位)(:|:)[^\r\n]+\r\n");
                            buildUnit = inviteCtx.GetBuildRegex();
                            if (inviteType == "设备材料" || inviteType == "小型施工" || inviteType == "专业分包" || inviteType == "劳务分包" || inviteType == "服务" || inviteType == "勘察" || inviteType == "设计" || inviteType == "监理" || inviteType == "施工")
                            {
                                specType = "建设工程";
                            }
                            else
                            {
                                specType = "其他";
                            }
                            msgType    = "深圳信息职业技术学院";
                            inviteType = ToolHtml.GetInviteTypes(prjName);
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳社会招标", "",
                                                                   string.Empty, code, prjName, prjAddress, buildUnit,
                                                                   beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #24
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookieStr       = string.Empty;
            int    pageInt         = 1;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "ecms_pagination")), true), new TagNameFilter("a")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    ATag   atag = pageList[pageList.Count - 2] as ATag;
                    string temp = atag.LinkText;
                    pageInt = int.Parse(temp);
                }
                catch
                { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.shcac.edu.cn:80/html/xxdt/tzgg/" + i.ToString() + ".html", Encoding.UTF8);
                    }
                    catch
                    {
                        continue;
                    }
                }

                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "list_main_content")), true), new TagNameFilter("ul")), true), new TagNameFilter("li")));


                if (nodeList != null && nodeList.Count > 0)
                {
                    for (int j = 0; j < nodeList.Count; j++)
                    {
                        string btName = string.Empty, btTime = string.Empty, btUrl = string.Empty;
                        ATag   aTag = nodeList[j].GetATag();
                        btName = nodeList[j].ToNodePlainString();
                        btTime = nodeList[j].ToNodePlainString().GetDateRegex();
                        btName = btName.Replace(btTime, "");
                        btUrl  = aTag.Link;
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = this.ToolWebSite.GetHtmlByUrl(btUrl, Encoding.UTF8);
                            htldtl = htldtl.GetJsString();
                        }
                        catch
                        {
                            continue;
                        }
                        parser = new Parser(new Lexer(htldtl));

                        NodeList dtlBt = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "detail_main_content")), true), new TagNameFilter("h3")));
                        if (dtlBt != null && dtlBt.Count > 0)
                        {
                            btName = dtlBt.AsString();

                            if (btName.Contains("招标公告") || btName.Contains("补充公告"))
                            {
                                string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                                       prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                                       specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                                       remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                                       CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                                parser.Reset();
                                NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("style", "line-height:22px;")));
                                if (dtlList != null && dtlList.Count > 0)
                                {
                                    prjName    = btName;
                                    beginDate  = btTime;
                                    InfoUrl    = btUrl;
                                    HtmlTxt    = dtlList.ToHtml();
                                    inviteCtx  = dtlList.ToHtml().Replace("</p>", "\r\n").ToCtxString().Replace("\r\n\t", "\r\n").Replace("\r\n\r\n", "\r\n");
                                    buildUnit  = inviteCtx.GetBuildRegex();
                                    prjAddress = inviteCtx.GetAddressRegex();
                                    msgType    = "上海民航职业技术学院";
                                    specType   = "";
                                    InviteInfo info = ToolDb.GenInviteInfo("上海市", "上海市区", string.Empty, string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                                    list.Add(info);



                                    parser = new Parser(new Lexer(HtmlTxt));
                                    NodeList aNodes = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                    if (aNodes != null && aNodes.Count > 0)
                                    {
                                        for (int a = 0; a < aNodes.Count; a++)
                                        {
                                            ATag aFile = aNodes[a] as ATag;
                                            if (aFile.IsAtagAttach())
                                            {
                                                string link = string.Empty;
                                                if (aFile.Link.ToLower().Contains("http"))
                                                {
                                                    link = aFile.Link;
                                                }
                                                else
                                                {
                                                    link = aFile.Link;
                                                }
                                                BaseAttach attach = ToolDb.GenBaseAttach(aFile.LinkText, info.Id, link);
                                                base.AttachList.Add(attach);
                                            }
                                        }
                                    }

                                    if (!crawlAll && list.Count >= this.MaxCount)
                                    {
                                        return(list);
                                    }
                                }
                            }
                            else if (btName.Contains("中标结果") || btName.Contains("结果公示") || btName.Contains("中标公示"))
                            {
                                string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty,
                                       bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty,
                                       msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty,
                                       HtmlTxt = string.Empty, area = string.Empty;
                                parser.Reset();
                                NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("style", "line-height:22px;")));
                                if (dtlList != null && dtlList.Count > 0)
                                {
                                    prjName   = btName;
                                    beginDate = btTime;
                                    InfoUrl   = btUrl;
                                    HtmlTxt   = dtlList.ToHtml();
                                    bidCtx    = dtlList.ToHtml().Replace("</p>", "\r\n").ToCtxString().Replace("\r\n\t", "\r\n").Replace("\r\n\r\n", "\r\n");
                                    buildUnit = bidCtx.GetBuildRegex();

                                    bidUnit = bidCtx.GetBidRegex();
                                    if (string.IsNullOrWhiteSpace(bidUnit))
                                    {
                                        bidUnit = bidCtx.GetRegex("中标人");
                                    }
                                    bidMoney  = bidCtx.GetMoneyRegex();
                                    buildUnit = bidCtx.GetBuildRegex();
                                    if (string.IsNullOrWhiteSpace(buildUnit))
                                    {
                                        buildUnit = bidCtx.GetRegex("招标人");
                                    }
                                    code = bidCtx.GetCodeRegex().GetCodeDel();
                                    if (!string.IsNullOrWhiteSpace(code))
                                    {
                                        if (code[code.Length - 1] != '号')
                                        {
                                            code = "";
                                        }
                                    }
                                    if (bidUnit.Contains("公司"))
                                    {
                                        bidUnit = bidUnit.Remove(bidUnit.IndexOf("公司")) + "公司";
                                    }
                                    msgType  = "上海民航职业技术学院";
                                    specType = "";
                                    bidType  = ToolHtml.GetInviteTypes(prjName);

                                    BidInfo info = ToolDb.GenBidInfo("上海市", "上海市区", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                                    list.Add(info);

                                    parser = new Parser(new Lexer(HtmlTxt));
                                    NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                    if (aNode != null && aNode.Count > 0)
                                    {
                                        for (int k = 0; k < aNode.Count; k++)
                                        {
                                            ATag a = aNode[k] as ATag;
                                            if (a.IsAtagAttach())
                                            {
                                                string link = string.Empty;
                                                if (a.Link.ToLower().Contains("http"))
                                                {
                                                    link = a.Link;
                                                }
                                                BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                                base.AttachList.Add(attach);
                                            }
                                        }
                                    }
                                    if (!crawlAll && list.Count >= this.MaxCount)
                                    {
                                        return(list);
                                    }
                                }
                            }
                            else
                            {
                                continue;
                            }
                        }
                        else
                        {
                            continue;
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #25
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new List <BidInfo>();

            DateTime start = DateTime.Parse("2016-11-30");
            DateTime end   = DateTime.Parse("2016-12-14");
            //取得页码
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookieStr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8, ref cookieStr);
            }
            catch (Exception ex)
            {
                return(list);
            }

            Parser parser = new Parser(new Lexer(html));

            NodeList sNode      = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "ctl00_cph_context_InfoList2_GridViewPaging1_PagingDescTd"), new TagNameFilter("td")));
            string   pageString = sNode.AsString();
            Regex    regexPage  = new Regex(@",共[^页]+页");
            Match    pageMatch  = regexPage.Match(pageString);

            try { pageInt = int.Parse(pageMatch.Value.Replace(",共", "").Replace("页", "").Trim()); }
            catch (Exception) { }

            string cookiestr = string.Empty;

            for (int i = 5; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(
                        new string[] {
                        "ctl00$ScriptManager1",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__LASTFOCUS",
                        "__VIEWSTATE",
                        "ctl00$cph_context$InfoList2$ddlProjectType",
                        "ctl00$cph_context$InfoList2$ddlSearch",
                        "ctl00$cph_context$InfoList2$txtProjectName",
                        "ctl00$cph_context$InfoList2$GridViewPaging1$txtGridViewPagingForwardTo",
                        "__VIEWSTATEENCRYPTED",
                        "ctl00$cph_context$InfoList2$GridViewPaging1$btnForwardToPage"
                    },
                        new string[] {
                        "ctl00$cph_context$InfoList2$update1|ctl00$cph_context$InfoList2$GridViewPaging1$btnForwardToPage",
                        string.Empty,
                        string.Empty,
                        string.Empty,
                        viewState,
                        string.Empty,
                        "gcbh",
                        string.Empty,
                        i.ToString(),
                        "",
                        "GO"
                    });
                    try
                    {
                        //string postDatas = string.Empty;
                        //foreach(string post in nvc.AllKeys)
                        //{
                        //    postDatas += string.Format("{0}={1}&", post, nvc.GetValues(post));
                        //}
                        //postDatas = postDatas.Remove(postDatas.Length - 1, 1);
                        //html = ToolHtml.GetHtmlByUrlPost(this.SiteUrl, postDatas, Encoding.UTF8, ref cookieStr);
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8);
                    }
                    catch (Exception ex) { continue; }
                }

                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "ctl00_cph_context_InfoList2_GridView1"), new TagNameFilter("table")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string   prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr = table.Rows[j] as TableRow;
                        code      = tr.Columns[1].ToPlainTextString().Trim();
                        prjName   = tr.Columns[2].ToPlainTextString().Trim();
                        buildUnit = tr.Columns[3].ToPlainTextString().Trim();
                        beginDate = tr.Columns[5].ToPlainTextString().Trim();
                        endDate   = tr.Columns[6].ToPlainTextString().Trim();
                        string InvType = tr.Columns[4].ToPlainTextString().Trim();
                        bidType = ToolHtml.GetInviteTypes(InvType);
                        ATag aTag = tr.Columns[2].SearchFor(typeof(ATag), true)[0] as ATag;
                        InfoUrl = "http://61.144.224.189:8001/LGjyzxWeb/SiteManage/" + aTag.Link.Replace("openNewWindowByMenu(\"", "").Replace("\")", "");
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace("&nbsp;", "").Trim();
                            Parser   dtlparserHTML = new Parser(new Lexer(htmldetail));
                            NodeList dtnodeHTML    = dtlparserHTML.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "ctl00_cph_context_lblContent"), new TagNameFilter("span")));
                            HtmlTxt    = dtnodeHTML.AsHtml();
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace("&nbsp;", "").Replace("</br>", "\r\n").Replace("<br>", "\r\n");
                        }
                        catch (Exception ex) { continue; }
                        Parser   dtlparser = new Parser(new Lexer(htmldetail));
                        NodeList dtnode    = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "ctl00_cph_context_lblContent"), new TagNameFilter("span")));
                        bidCtx = dtnode.AsString().Replace(" ", "");

                        Regex regPrjAdd = new Regex(@"(工程地点|工程地址):[^\r\n]+[\r\n]{1}");
                        prjAddress = regPrjAdd.Match(bidCtx).Value.Replace("工程地点:", "").Replace("工程地址:", "").Trim();
                        msgType    = "深圳市建设工程交易中心龙岗分中心";
                        specType   = "建设工程";
                        Regex regMoney = new Regex(@"(中标价):[^\r\n]+[\r\n]{1}");
                        bidMoney = regMoney.Match(bidCtx).Value.Replace("中标价:", "").Replace("万元", "").Trim();
                        Regex regprjMgr = new Regex(@"(项目经理|项目负责人|项目总监|建造师):[^\r\n]+[\r\n]{1}");
                        prjMgr = regprjMgr.Match(bidCtx).Value.Replace("项目经理:", "").Trim();
                        Regex regBidUnit = new Regex(@"(中标人|中标单位):[^\r\n]+[\r\n]{1}");
                        bidUnit = regBidUnit.Match(bidCtx).Value.Replace("中标人:", "").Replace("中标单位", "").Trim();
                        Regex  regOtherType = new Regex(@"(工程类型):[^\r\n]+[\r\n]{1}");
                        string oType        = regOtherType.Match(bidCtx).Value.Replace("工程类型:", "").Trim();
                        if (oType.Contains("房建"))
                        {
                            otherType = "房建及工业民用建筑";
                        }
                        if (oType.Contains("市政"))
                        {
                            otherType = "市政工程";
                        }
                        if (oType.Contains("园林绿化"))
                        {
                            otherType = "园林绿化工程";
                        }
                        if (oType.Contains("装饰装修"))
                        {
                            otherType = "装饰装修工程";
                        }
                        if (oType.Contains("电力"))
                        {
                            otherType = "电力工程";
                        }
                        if (oType.Contains("水利"))
                        {
                            otherType = "水利工程";
                        }
                        if (oType.Contains("环保"))
                        {
                            otherType = "环保工程";
                        }
                        if (Encoding.Default.GetByteCount(bidUnit) > 150)
                        {
                            bidUnit = "";
                        }
                        if (Encoding.Default.GetByteCount(prjMgr) > 50)
                        {
                            prjMgr = "";
                        }
                        //prjName = ToolDb.GetPrjName(prjName);
                        BidInfo info = ToolDb.GenBidInfo("广东省", "深圳龙岗区工程", "龙岗区", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);

                        if (info.BeginDate < start)
                        {
                            return(list);
                        }

                        if (info.BeginDate > start && info.BeginDate < end)
                        {
                            list.Add(info);
                        }
                        else
                        {
                            continue;
                        }
                        dtlparser.Reset();



                        NodeList fileNode = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_cph_context_AccessoriesControl1_GridView1")));
                        if (fileNode != null && fileNode.Count > 0 && fileNode[0] is TableTag)
                        {
                            TableTag fileTable = fileNode[0] as TableTag;
                            for (int f = 1; f < fileTable.Rows.Length; f++)
                            {
                                BaseAttach attach = ToolDb.GenBaseAttach(fileTable.Rows[f].Columns[0].ToPlainTextString().Trim(), info.Id, "http://jyzx.cb.gov.cn/LGjyzxWeb/" + (fileTable.Rows[f].Columns[0].SearchFor(typeof(ATag), true)[0] as ATag).Link.Replace("../", ""));
                                base.AttachList.Add(attach);
                            }
                        }



                        if (!crawlAll && list.Count >= this.MaxCount)
                        {
                            return(list);
                        }
                    }
                }
            }



            return(list);
        }
Beispiel #26
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default, ref cookiestr);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(htl));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("width", "700")));

            if (nodeList != null && nodeList.Count > 0)
            {
                try
                {
                    string ooooo = Regex.Replace(nodeList[0].ToPlainTextString().Trim().Replace(":", "").Replace(":", "").Replace("&nbsp;", ""), @"[\u4e00-\u9fa5]", "");
                    page = int.Parse(ooooo.Substring(ooooo.IndexOf("/")).Replace("/", "").Trim());
                }
                catch { }
            }
            for (int i = 1; i <= page; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl + "?page=" + i.ToString(), Encoding.Default);
                    }
                    catch  { continue; }
                }
                parser = new Parser(new Lexer(htl));
                NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellPadding", "5")));
                if (tableNodeList != null && tableNodeList.Count > 0)
                {
                    TableTag table = (TableTag)tableNodeList[0];
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string prjName = string.Empty,
                               buildUnit = string.Empty, bidUnit = string.Empty,
                               bidMoney = string.Empty, code = string.Empty,
                               bidDate = string.Empty,
                               beginDate = string.Empty,
                               endDate = string.Empty, bidType = string.Empty,
                               specType = string.Empty, InfoUrl = string.Empty,
                               msgType = string.Empty, bidCtx = string.Empty,
                               prjAddress = string.Empty, remark = string.Empty,
                               prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr = table.Rows[j];
                        beginDate = tr.Columns[2].ToPlainTextString().Trim();
                        prjName   = tr.Columns[1].ToPlainTextString().Replace("&#8226;", "").Trim();
                        ATag aTag = tr.Columns[1].SearchFor(typeof(ATag), true)[0] as ATag;
                        InfoUrl = "http://www.stjs.org.cn/zbtb/" + aTag.Link;
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).Replace("&nbsp;", "");
                        }
                        catch
                        {
                            continue;
                        }
                        Parser   parserdetail = new Parser(new Lexer(htmldetail));
                        NodeList dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellPadding", "4")));
                        if (dtnode.Count > 0)
                        {
                            HtmlTxt = dtnode.AsHtml();
                            TableTag tableRow = dtnode.SearchFor(typeof(TableTag), true)[0] as TableTag;
                            for (int row = 0; row < tableRow.RowCount; row++)
                            {
                                TableRow r = tableRow.Rows[row];

                                for (int k = 0; k < r.ColumnCount; k++)
                                {
                                    string st  = string.Empty;
                                    string st1 = string.Empty;
                                    st = r.Columns[k].ToPlainTextString().Trim();
                                    if (k + 1 < r.ColumnCount)
                                    {
                                        st1 = r.Columns[k + 1].ToPlainTextString().Trim();
                                    }
                                    bidCtx += st + ":" + st1 + "\r\n";
                                    if (k + 1 <= r.ColumnCount)
                                    {
                                        k++;
                                    }
                                }
                            }
                            code = bidCtx.GetCodeRegex().GetReplace("/");
                            Regex regBuidUnit = new Regex(@"(招标人|建设单位)(:|:)[^\r\n]+\r\n");
                            buildUnit = regBuidUnit.Match(bidCtx).Value.Replace("招标人:", "").Replace("建设单位:", "").Trim();
                            Regex regBidUnit = new Regex(@"中标单位(:|:)[^\r\n]+\r\n");
                            bidUnit = regBidUnit.Match(bidCtx).Value.Replace("中标单位:", "").Replace("/", "").Trim();

                            bidMoney = bidCtx.GetMoneyRegex();

                            string[] prjNames = prjName.Split(':');
                            prjName   = prjNames[prjNames.Length - 1];
                            beginDate = beginDate.GetReplace(".", "-");
                            string temp = bidCtx.GetRegex("工程名称", false);
                            if (!string.IsNullOrWhiteSpace(temp))
                            {
                                prjName = temp;
                            }

                            msgType  = "汕头市建设工程交易中心";
                            specType = "建设工程";
                            bidType  = ToolHtml.GetInviteTypes(prjName);

                            prjName = ToolDb.GetPrjName(prjName);
                            BidInfo info = ToolDb.GenBidInfo("广东省", "汕头市区", "", string.Empty, code, prjName, buildUnit, beginDate,
                                                             bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType,
                                                             bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(null);
        }
Beispiel #27
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default, ref cookiestr);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(htl));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("height", "34")));

            if (nodeList != null && nodeList.Count > 0)
            {
                string pageString = nodeList.AsString();
                Regex  regexPage  = new Regex(@"1/[^页]+");
                Match  pageMatch  = regexPage.Match(pageString);
                try
                {
                    page = int.Parse(pageMatch.Value.Replace("1/", "").Replace("下一", ""));
                }
                catch { page = 1; }
            }
            for (int i = 1; i <= page; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl + "?page=" + i.ToString()), Encoding.Default);
                    }
                    catch (Exception ex) { continue; }
                }
                parser = new Parser(new Lexer(htl));
                NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellPadding", "5")));
                if (tableNodeList != null && tableNodeList.Count > 0)
                {
                    TableTag table = (TableTag)tableNodeList[0];
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr = table.Rows[j];
                        prjName   = tr.Columns[1].ToPlainTextString().Replace("&#8226;", "").Trim();
                        beginDate = tr.Columns[2].ToPlainTextString().Replace("&nbsp; ", "").Trim();
                        ATag aTag = tr.Columns[1].SearchFor(typeof(ATag), true)[0] as ATag;
                        InfoUrl = "http://www.stjs.org.cn/zbtb/" + aTag.Link;
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).Replace("&nbsp;", "");
                        }
                        catch
                        {
                            continue;
                        }
                        Parser   parserdetail = new Parser(new Lexer(htmldetail));
                        NodeList dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellPadding", "4")));
                        if (dtnode.Count > 0)
                        {
                            HtmlTxt = dtnode.AsHtml();
                            TableTag tableRow = dtnode.SearchFor(typeof(TableTag), true)[0] as TableTag;
                            for (int row = 0; row < tableRow.RowCount; row++)
                            {
                                TableRow r = tableRow.Rows[row];

                                for (int k = 0; k < r.ColumnCount; k++)
                                {
                                    string st  = string.Empty;
                                    string st1 = string.Empty;
                                    st = r.Columns[k].ToPlainTextString().Trim();
                                    if (k + 1 < r.ColumnCount)
                                    {
                                        st1 = r.Columns[k + 1].ToPlainTextString().Trim();
                                    }
                                    inviteCtx += st + ":" + st1 + "\r\n";
                                    if (k + 1 <= r.ColumnCount)
                                    {
                                        k++;
                                    }
                                }
                            }
                            code = inviteCtx.GetCodeRegex().GetReplace("/");;
                            Regex regBuidUnit = new Regex(@"(招标人|建设单位)(:|:)[^\r\n]+\r\n");
                            buildUnit = regBuidUnit.Match(inviteCtx).Value.Replace("招标人:", "").Replace("建设单位:", "").Trim();
                            Regex regPrjAddr = new Regex(@"(工程地点|项目地址)(:|:)[^\r\n]+\r\n");
                            prjAddress = regPrjAddr.Match(inviteCtx).Value.Replace("工程地点:", "").Trim();
                            inviteCtx  = inviteCtx.Replace("<?", "").Replace("xml:namespace prefix = st1 ns = ", "").Replace("urn:schemas-microsoft-com:office:smarttags", "").Replace("/>", "").Replace(":", "").Trim();
                            inviteCtx  = inviteCtx.Replace("<?", "").Replace("xml:namespace prefix = o ns = ", "").Replace("urn:schemas-microsoft-com:office:office", "").Replace("/>", "").Replace(":", "").Trim();
                            msgType    = "汕头市建设工程交易中心";
                            specType   = "建设工程";

                            string[] prjNames = prjName.Split(':');
                            prjName   = prjNames[prjNames.Length - 1];
                            beginDate = beginDate.GetReplace(".", "-");
                            string temp = inviteCtx.GetRegex("工程名称", false);
                            if (!string.IsNullOrWhiteSpace(temp))
                            {
                                prjName = temp;
                            }
                            inviteType = ToolHtml.GetInviteTypes(prjName);
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "汕头市区", "",
                                                                   string.Empty, code, prjName, prjAddress, buildUnit,
                                                                   beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parserdetail.Reset();
                            NodeList fileNode = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("height", "30")), true), new TagNameFilter("a")));
                            if (fileNode.Count > 0)
                            {
                                for (int f = 0; f < fileNode.Count; f++)
                                {
                                    ATag       aTa3g  = fileNode[f] as ATag;
                                    BaseAttach attach = ToolDb.GenBaseAttach(aTa3g.LinkText, info.Id, "http://www.stjs.org.cn/zbtb/" + aTa3g.Link);
                                    base.AttachList.Add(attach);
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(null);
        }
Beispiel #28
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new List <BidInfo>();
            //取得页码
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl + "1", Encoding.UTF8);
            }
            catch
            {
                return(list);
            }

            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("class", "paging"), new TagNameFilter("div")));

            if (sNode != null && sNode.Count > 0)
            {
                string temp = sNode[0].ToNodePlainString();
                try
                {
                    temp    = temp.GetRegexBegEnd("/", "转到");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }

            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl + i, Encoding.UTF8);
                    }
                    catch
                    {
                        continue;
                    }
                }

                parser = new Parser(new Lexer(html));
                sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new HasAttributeFilter("class", "column-info-list"), new TagNameFilter("div")), true), new TagNameFilter("li")));
                if (sNode != null && sNode.Count > 0)
                {
                    for (int t = 0; t < sNode.Count; t++)
                    {
                        string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty,
                               code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty,
                               bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty,
                               otherType = string.Empty, HtmlTxt = string.Empty;
                        ATag aTag = sNode[t].GetATag();
                        prjName   = aTag.LinkText.ToNodeString();
                        InfoUrl   = "http://ggzy.zhaoqing.gov.cn" + aTag.Link;
                        beginDate = sNode[t].ToPlainTextString().GetDateRegex();
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch
                        {
                            continue;
                        }
                        Parser   dtlparser = new Parser(new Lexer(htmldetail));
                        NodeList dtnode    = dtlparser.ExtractAllNodesThatMatch(new TagNameFilter("body"));
                        if (dtnode != null && dtnode.Count > 0)
                        {
                            HtmlTxt = dtnode.AsHtml();
                            bidCtx  = HtmlTxt.ToCtxString();

                            bidUnit  = bidCtx.GetBidRegex();
                            bidMoney = bidCtx.GetMoneyRegex();

                            if (string.IsNullOrWhiteSpace(bidUnit))
                            {
                                dtlparser = new Parser(new Lexer(HtmlTxt));
                                NodeList tableNode = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("border", "1")));
                                if (tableNode == null || tableNode.Count < 1)
                                {
                                    dtlparser.Reset();
                                    tableNode = dtlparser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                                }
                                if (tableNode != null && tableNode.Count > 0)
                                {
                                    string   ctx   = string.Empty;
                                    TableTag table = tableNode[0] as TableTag;
                                    if (table.Rows[0].ColumnCount >= 2)
                                    {
                                        for (int j = 1; j < table.RowCount; j++)
                                        {
                                            ctx += table.Rows[j].Columns[0].ToNodePlainString() + ":";
                                            ctx += table.Rows[j].Columns[1].ToNodePlainString() + "\r\n";
                                        }
                                        bidUnit = ctx.GetBidRegex();
                                        if (string.IsNullOrWhiteSpace(bidUnit))
                                        {
                                            bidUnit = ctx.GetRegex("单位名称,第一中标候选人");
                                        }
                                        bidMoney = ctx.GetMoneyRegex();
                                        prjMgr   = ctx.GetMgrRegex();
                                    }
                                }
                            }
                            buildUnit  = bidCtx.GetBuildRegex();
                            prjAddress = bidCtx.GetAddressRegex();
                            code       = bidCtx.GetCodeRegex();

                            msgType  = "肇庆市公共资源交易中心";
                            specType = "建设工程";

                            prjName = ToolDb.GetPrjName(prjName);
                            bidType = ToolHtml.GetInviteTypes(prjName);
                            BidInfo info = ToolDb.GenBidInfo("广东省", "肇庆市区", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            list.Add(info);
                            dtlparser = new Parser(new Lexer(HtmlTxt));
                            NodeList aNode = dtlparser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int a = 0; a < aNode.Count; a++)
                                {
                                    ATag fileTag = aNode[a] as ATag;
                                    if (fileTag.IsAtagAttach())
                                    {
                                        string url = string.Empty;
                                        if (fileTag.Link.Contains("http"))
                                        {
                                            url = fileTag.Link;
                                        }
                                        else
                                        {
                                            url = this.SiteUrl + beginDate.GetReplace("-").Substring(0, 6) + fileTag.Link.GetReplace("./", "/");
                                        }
                                        BaseAttach item = ToolDb.GenBaseAttach(fileTag.LinkText, info.Id, url);

                                        base.AttachList.Add(item);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Beispiel #29
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new List <InviteInfo>();
            //取得页码
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(list);
            }

            Parser   parser  = new Parser(new Lexer(html));
            NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("bgColor", "#EEF4F9")));

            if (tdNodes != null && tdNodes.Count > 0)
            {
                string pageTemp = tdNodes.AsString().Replace("&nbsp;", "").Replace(" ", "").Trim();
                Regex  regpage  = new Regex(@"1/[0-9]+页");
                try
                {
                    pageInt = int.Parse(regpage.Match(pageTemp).Value.Split('/')[1].Replace("页", "").Trim());
                }
                catch (Exception ex) { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.szzdzb.cn/Product-index-id-8-p-" + i + ".html", Encoding.UTF8);
                    }
                    catch
                    {
                        continue;
                    }
                }

                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "hangao27"))), new TagNameFilter("table")));

                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string   code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr = table.Rows[j];
                        code    = tr.Columns[0].ToPlainTextString().Trim();
                        prjName = tr.Columns[1].ToPlainTextString().Trim();
                        ATag aTag = tr.Columns[1].SearchFor(typeof(ATag), true)[0] as ATag;
                        InfoUrl = "http://www.szzdzb.cn" + aTag.Link;
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace("&nbsp;", "").Trim();
                            Parser   dtlparserHTML = new Parser(new Lexer(htmldetail));
                            NodeList dtnodeHTML    = dtlparserHTML.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "hangao27"))), new TagNameFilter("table")));
                            HtmlTxt    = dtnodeHTML.AsHtml();
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace("&nbsp;", "").Replace("</br>", "\r\n").Replace("<br>", "\r\n");
                            Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>");
                            htmldetail = regexHtml.Replace(htmldetail, "");
                        }
                        catch (Exception ex) { continue; }
                        Parser   dtlparser = new Parser(new Lexer(htmldetail));
                        NodeList dtnode    = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "hangao27"))), new TagNameFilter("table")));

                        inviteCtx = System.Web.HttpUtility.HtmlDecode(dtnode.AsString().Replace("打印本页 || 关闭窗口", ""));
                        Regex regCtx = new Regex(@"([\r\n]+)|([\t]+)");
                        inviteCtx = regCtx.Replace(inviteCtx, "\r\n");
                        Regex regBeginDate = new Regex(@"发布时间:[^\r\n]+\r\n");
                        beginDate  = regBeginDate.Match(inviteCtx).Value.Replace("发布时间", "").Replace(":", "").Trim();
                        specType   = "其他";
                        msgType    = "深圳市振东招标代理有限公司";
                        inviteType = ToolHtml.GetInviteTypes(prjName);
                        InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳社会招标", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                        list.Add(info);
                        dtlparser.Reset();
                        dtnode = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("bgColor", "#CCCCCC")));
                        NodeList FileTag = dtnode.SearchFor(typeof(ATag), true);
                        if (FileTag != null && FileTag.Count > 0)
                        {
                            for (int f = 0; f < FileTag.Count; f++)
                            {
                                ATag file = FileTag[f] as ATag;
                                if (file.Link.ToUpper().Contains(".DOC"))
                                {
                                    BaseAttach attach = ToolDb.GenBaseAttach(file.ToPlainTextString(), info.Id, "http://www.szzdzb.cn" + file.Link);
                                    base.AttachList.Add(attach);
                                }
                            }
                        }
                        if (!crawlAll && list.Count >= this.MaxCount)
                        {
                            return(list);
                        }
                    }
                }
            }

            return(list);
        }
Beispiel #30
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default, ref cookiestr);
                Regex regexHtml = new Regex(@"<script[^<]*</script>");
                htl = regexHtml.Replace(htl, "");
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(htl));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("colSpan", "6")));

            if (nodeList != null && nodeList.Count > 0)
            {
                Regex regexPage = new Regex(@"共\d+页");
                page = int.Parse(regexPage.Match(nodeList.AsString()).Value.Trim(new char[] { '共', '页' }));
            }
            for (int i = 1; i < page; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(htl);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(htl);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__VIEWSTATE",
                        "key",
                        "AxGridView1$ctl23$ctl07",
                        "AxGridView1$ctl23$pageList",
                        "__VIEWSTATEENCRYPTED",
                        "__EVENTVALIDATION"
                    }, new string[] {
                        "AxGridView1$ctl23$ctl03",
                        string.Empty,
                        viewState,
                        string.Empty,
                        "20",
                        (i - 1).ToString(),
                        string.Empty,
                        eventValidation
                    });
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.Default, ref cookiestr);
                    }
                    catch (Exception ex) { continue; }
                }
                parser = new Parser(new Lexer(htl));
                NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "AxGridView1")));
                if (tableNodeList != null && tableNodeList.Count > 0)
                {
                    TableTag table = (TableTag)tableNodeList[0];
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr = table.Rows[j];
                        code    = tr.Columns[2].ToPlainTextString().Trim();
                        prjName = tr.Columns[3].ToPlainTextString().Trim();
                        //endDate = tr.Columns[4].ToPlainTextString().Replace("&nbsp; ", "").Trim().Substring(0, 10);
                        ATag aTag = tr.Columns[5].SearchFor(typeof(ATag), true)[0] as ATag;
                        InfoUrl = "http://www.yjgcjy.cn/" + aTag.Link;
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(InfoUrl), Encoding.Default).Replace("&nbsp;", "");
                        }
                        catch (Exception)
                        {
                            Logger.Error("InviteYJYXJS");
                            continue;
                        }
                        Parser   parserdetail = new Parser(new Lexer(htmldetail));
                        NodeList dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellSpacing", "1")));
                        if (dtnode != null && dtnode.Count > 0)
                        {
                            HtmlTxt = dtnode.AsHtml();
                            TableTag tableRow = (TableTag)dtnode[0];
                            for (int k = 1; k < tableRow.RowCount; k++)
                            {
                                TableRow trow = tableRow.Rows[k];
                                for (int c = 0; c < trow.ColumnCount; c++)
                                {
                                    string tr1 = string.Empty;
                                    tr1        = trow.Columns[c].ToPlainTextString().Trim();
                                    inviteCtx += tr1;
                                }
                                inviteCtx += "\r\n";
                            }
                            Regex regPrjAddr = new Regex(@"工程建设地址:[^\r\n]+\r\n");
                            try
                            {
                                prjAddress = regPrjAddr.Match(inviteCtx).Value.Replace("工程建设地址", "").Replace(":", "").Replace("。", "").Replace("、", "").Replace(";", "").Replace(",", "").Trim();
                                if (Encoding.Default.GetByteCount(prjAddress) > 200 || prjAddress == "")
                                {
                                    prjAddress = "见招标详细信息";
                                }
                            }
                            catch (Exception)
                            {
                                prjAddress = "见招标详细信息";
                            }
                            Regex regBegin = new Regex(@"公告发布时间:[^\r\n]+[\r\n]{1}");
                            beginDate = regBegin.Match(inviteCtx).Value.Replace("公告发布时间:", "").Trim();
                            string date    = beginDate.Replace(" ", "").Trim();
                            Regex  regDate = new Regex(@"\d{4}年\d{1,2}月\d{1,2}日");
                            beginDate = regDate.Match(date).Value.Trim();
                            if (beginDate == "")
                            {
                                Regex regDateT = new Regex(@"[u4e00-u9fa5]{4}年[u4e00-u9fa5]{1,2}月[u4e00-u9fa5]{1,2}日");
                                beginDate = regDateT.Match(inviteCtx).Value.Replace("公告发布时间:", "").Trim();
                            }
                            if (beginDate == "")
                            {
                                beginDate = string.Empty;
                            }
                            Regex bildUnit = new Regex(@"建设单位:[^\r\n]+[\r\n]{1}");
                            buildUnit = bildUnit.Match(inviteCtx).Value.Replace("建设单位:", "").Trim();
                            if (buildUnit == "")
                            {
                                buildUnit = "";
                            }
                            msgType    = "阳江市建设工程交易中心";
                            specType   = "建设工程";
                            inviteType = ToolHtml.GetInviteTypes(prjName);
                            inviteCtx  = inviteCtx.Replace("<?", "").Replace("xml:namespace prefix = o ns = ", "").Replace("urn:schemas-microsoft-com:office:office", "").Replace("/>", "").Trim();
                            inviteCtx  = inviteCtx.Replace("<?", "").Replace("xml:namespace prefix = ns0 ns = ", "").Replace("urn:schemas-microsoft-com:office:smarttags", "").Replace("/>", "").Trim();
                            inviteCtx  = inviteCtx.Replace("<?", "").Replace("xml:namespace prefix = st1 ns = ", "").Replace("urn:schemas-microsoft-com:office:smarttags", "").Replace("/>", "").Trim();
                            inviteCtx  = inviteCtx.Replace("xml:namespace prefix = st1", "").Trim();
                            inviteCtx  = inviteCtx.Replace("点击进入留言", "").Trim();
                            code       = code.Replace(";", "").Replace(":", "").Trim();
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "阳江市区", "阳西县", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            parserdetail.Reset();
                            NodeList fileNode = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellSpacing", "1")));
                            if (fileNode != null && fileNode.Count > 0 && fileNode[0] is TableTag)
                            {
                                TableTag fileTable = fileNode[0] as TableTag;
                                for (int f = 10; f < fileTable.RowCount; f++)
                                {
                                    TableRow trowFile = fileTable.Rows[f];
                                    for (int z = 0; z < 1; z++)
                                    {
                                        string tr1 = string.Empty;
                                        tr1 = trowFile.Columns[z].ToPlainTextString().Trim();
                                        if (tr1.Contains("下载招标文件:") || tr1.Contains("下载工程量清单:") || tr1.Contains("下载图纸:"))
                                        {
                                            if (fileTable.Rows[f].Columns[z + 1].ToPlainTextString().Trim() != "")
                                            {
                                                int tt = fileTable.Rows[f].Columns[z + 1].SearchFor(typeof(ATag), true).Count;
                                                for (int ii = 0; ii < tt; ii++)
                                                {
                                                    string st3       = fileTable.Rows[f].Columns[z + 1].SearchFor(typeof(ATag), true)[ii].ToPlainTextString().Trim();
                                                    ATag   aTagCh    = fileTable.Rows[f].Columns[z + 1].SearchFor(typeof(ATag), true)[ii] as ATag;
                                                    string urlValues = "http://www.yjgcjy.cn" + aTagCh.Link;
                                                    if (aTagCh.Link.Contains("http://www.yjgcjy.cn"))
                                                    {
                                                        urlValues = aTagCh.Link;
                                                    }
                                                    if (st3 != "")
                                                    {
                                                        BaseAttach attach = ToolDb.GenBaseAttach(st3, info.Id, urlValues);
                                                        base.AttachList.Add(attach);
                                                    }
                                                }
                                            }
                                        }
                                        else
                                        {
                                            continue;
                                        }
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }

                        else
                        {
                            code = "";
                            Parser   parserdetailtwo = new Parser(new Lexer(htmldetail));
                            NodeList dtnodetwo       = parserdetailtwo.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "nr")));
                            if (dtnodetwo != null && dtnodetwo.Count > 0)
                            {
                                HtmlTxt   = dtnodetwo.AsHtml();
                                inviteCtx = dtnodetwo.AsString().Replace("。", "").Trim();
                                Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>");
                                inviteCtx = regexHtml.Replace(inviteCtx, "").Replace("O", "〇");
                                Regex regPrjAddr = new Regex(@"(工程建设地点|工程地点):[^\r\n]+\r\n");
                                prjAddress = regPrjAddr.Match(inviteCtx).Value.Replace("工程建设地点", "").Replace("工程地点", "").Replace(":", "").Trim();
                                if (prjAddress == "")
                                {
                                    prjAddress = "见招标详细信息";
                                }
                                Regex regDateT = new Regex(@"[^u4e00-u9fa5]{4}年[^u4e00-u9fa5]{1,3}月[^u4e00-u9fa5]{1,3}日");
                                beginDate = regDateT.Match(inviteCtx).Value.Trim();
                                beginDate = returnS(beginDate);
                                if (beginDate == "")
                                {
                                    beginDate = string.Empty;
                                }
                                Regex bildUnit = new Regex(@"发包人:[^\r\n]+[\r\n]{1}");
                                buildUnit = bildUnit.Match(inviteCtx).Value.Replace("发包人:", "").Trim();
                                if (buildUnit == "")
                                {
                                    buildUnit = "";
                                }
                                msgType    = "阳江市建设工程交易中心";
                                specType   = "建设工程";
                                inviteType = ToolHtml.GetInviteTypes(prjName);
                                inviteCtx  = inviteCtx.Replace("<?", "").Replace("xml:namespace prefix = o ns = ", "").Replace("urn:schemas-microsoft-com:office:office", "").Replace("/>", "").Trim();
                                inviteCtx  = inviteCtx.Replace("<?", "").Replace("xml:namespace prefix = ns0 ns = ", "").Replace("urn:schemas-microsoft-com:office:smarttags", "").Replace("/>", "").Trim();
                                inviteCtx  = inviteCtx.Replace("<?", "").Replace("xml:namespace prefix = st1 ns = ", "").Replace("urn:schemas-microsoft-com:office:smarttags", "").Replace("/>", "").Trim();
                                inviteCtx  = inviteCtx.Replace("xml:namespace prefix = st1", "").Trim();
                                inviteCtx  = inviteCtx.Replace("点击进入留言", "").Trim();
                                inviteCtx  = inviteCtx.Replace("〇", "0");
                                InviteInfo info = ToolDb.GenInviteInfo("广东省", "阳江市区", "阳西县", string.Empty, code, prjName,
                                                                       prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType,
                                                                       inviteType, specType, otherType, InfoUrl, HtmlTxt);
                                list.Add(info);
                                if (!crawlAll && list.Count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }