예제 #1
0
        //根据各种筛选条件,获取到需要的元素,后其看是否改为全Filter
        public string GetByFilter(string html, FilterModel model)//OR与AND都只能同时接受两个
        {
            string result = "";

            if (model.EType.ToLower().Equals("title"))
            {
                return(GetTitle(html));
            }
            NodeList nodes = GetTagList(html, model.EType);

            if (!string.IsNullOrEmpty(model.ID))
            {
                HasAttributeFilter filter = new HasAttributeFilter("id", model.ID);
                nodes = nodes.ExtractAllNodesThatMatch(filter);
            }
            if (!string.IsNullOrEmpty(model.CSS))
            {
                HasAttributeFilter filter = new HasAttributeFilter("class", model.CSS);
                nodes = nodes.ExtractAllNodesThatMatch(filter);
            }
            if (!model.AllowScript)
            {
                TagNameFilter filter = new TagNameFilter("script");
                nodes.ExtractAllNodesThatMatch(filter, true);
            }
            //将图片文件本地化
            {
                TagNameFilter filter = new TagNameFilter("img");
                NodeList      imgs   = nodes.ExtractAllNodesThatMatch(filter, true);
                for (int i = 0; i < imgs.Count; i++)
                {
                    ImageTag img      = imgs[i] as ImageTag;
                    string   savepath = function.VToP(vdir + Path.GetFileName(img.ImageURL));
                    if (File.Exists(savepath))
                    {
                        continue;
                    }                                       //避免图片重复下载
                    img.ImageURL = httpHelp.DownFile(baseurl, img.ImageURL, savepath);
                }
            }
            result = nodes.AsHtml();
            if (!string.IsNullOrWhiteSpace(model.Start) && !string.IsNullOrWhiteSpace(model.End))
            {
                result = regHelper.GetValueBySE(result, model.Start, model.End);
            }
            return(result);
        }
예제 #2
0
        private void GetCorpStaffSzjsjMethod(string url, IList list, string html, bool crawlAll)
        {
            Parser   parser = new Parser(new Lexer(html));
            NodeList aNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "dgConstBid")));

            if (aNodes != null && aNodes.Count == 1 && aNodes[0] is TableTag)
            {
                TableTag table = (TableTag)aNodes[0];
                for (int i = 1; i < table.Rows.Length; i++)
                {
                    if (table.Rows[i].Columns.Length == 6)
                    {
                        Type   typs = typeof(ATag);
                        string Name = string.Empty, Sex = string.Empty, CredType = string.Empty, IdNum = string.Empty, CorpName = string.Empty, CorpCode = string.Empty, CertCode = string.Empty, CertGrade = string.Empty, RegLevel = string.Empty, RegCode = string.Empty, AuthorUnit = string.Empty, PersonType = string.Empty, Province = string.Empty, City = string.Empty, CreateTime = string.Empty, InfoSource = string.Empty, Url = string.Empty, Profession = string.Empty;
                        Name = table.Rows[i].Columns[1].ToPlainTextString().Trim().Replace("&nbsp;", "");
                        //Sex = table.Rows[i].Columns[1].ToPlainTextString().Trim().Replace("&nbsp;", "");
                        string urlSpilt = (table.Rows[i].Columns[1].Children.SearchFor(typs, true)[0] as ATag).Link;
                        string idnum    = urlSpilt.Replace("GoDetail('", "").Replace("');", "");                                                              //urlSpilt.Substring(urlSpilt.IndexOf("('"), (urlSpilt.Length  - 2));
                        IdNum      = idnum.Replace("&am", "").Replace("&a", "").Replace("p;c", "").Replace("cate", "").Replace("cat", "").Replace("ate", ""); //
                        CorpName   = table.Rows[i].Columns[2].ToPlainTextString().Trim().Replace("&nbsp;", "");
                        CorpCode   = CorpName;
                        CertCode   = table.Rows[i].Columns[4].ToPlainTextString().Trim().Replace("&nbsp;", "");
                        Profession = table.Rows[i].Columns[5].ToPlainTextString().Trim().Replace("&nbsp;", "");
                        PersonType = table.Rows[i].Columns[3].ToPlainTextString().Trim().Replace("&nbsp;", "");
                        Url        = "http://61.144.226.2/ryxx/Detail_LWDZ.aspx?ID_NUMBER=" + idnum;
                        string ctxhtml = string.Empty;
                        try
                        {
                            ctxhtml = ToolWeb.GetHtmlByUrl(Url, Encoding.Default);
                        }
                        catch (Exception ex)
                        {
                            Logger.Error("人员姓名:" + CorpName + ",证件号:" + IdNum + "所在单位:" + CorpName + "," + Url + ";" + ex);
                            continue;
                        }

                        Parser          parserCtx = new Parser(new Lexer(ctxhtml));
                        NodeList        ctxNode   = parserCtx.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("borderColor", "#cccccc")));
                        TableTag        tabTag    = ctxNode[0] as TableTag;
                        string          text      = ctxNode.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("TD"), new HasAttributeFilter("width", "76%")), true).AsString().Replace("&nbsp;", "");
                        string          strSpilt  = "任职企业编号:.*?\r\n";
                        MatchCollection mc        = Regex.Matches(text, strSpilt);
                        foreach (Match m in mc)
                        {
                            CorpCode = m.ToString().Replace("任职企业编号:", "").Replace("\r\n", "");
                        }
                        CorpStaff corpStaff = ToolDb.GenCorpStaff(Name, Sex, CredType, string.Empty, CorpName, CorpCode, CertCode, RegLevel, RegCode, AuthorUnit, PersonType, CertGrade, "广东省", "深圳市区", "深圳市住房和建设局", Url, Profession, "", "", "", "");
                        // list.Add(corpStaff);
                        ToolDb.SaveEntity(corpStaff, this.ExistCompareFields);

                        // if (!crawlAll && list.Count >= this.MaxCount) return;
                    }
                }
                parser.Reset();
            }
        }
예제 #3
0
        /// <summary>
        /// 获取BodyHtml,去除Script
        /// </summary>
        public string GetBodyHtml(string html)
        {
            HtmlPage   page      = GetPage(html);
            NodeList   nodelist  = page.Body;
            NodeFilter filter    = new TagNameFilter("script");
            NodeList   childnode = nodelist.ExtractAllNodesThatMatch(filter, true);

            for (int i = 0; i < childnode.Size(); i++)
            {
                nodelist.Remove(childnode[i]);
            }
            return(nodelist.ToHtml());
        }
예제 #4
0
        /// <summary>
        /// 解析html页代码 根据过滤条件获取xml节点
        /// </summary>
        /// <param name="url"></param>
        /// <param name="filter"></param>
        /// <returns></returns>
        public static NodeList GetNodeList(string url, NodeFilter filter)
        {
            //获取网页源代码
            string pageContent = WebCapture.CapturePage(url);
            //利用内置解析类 开始解析成xml格式
            Lexer    lex      = new Lexer(pageContent);
            Parser   parser   = new Parser(lex);
            NodeList nodeList = parser.Parse(null);

            //添加过滤条件
            nodeList = nodeList.ExtractAllNodesThatMatch(filter, true);
            return(nodeList);
        }
예제 #5
0
        /// <summary>
        /// 获得列表
        /// </summary>
        /// <returns></returns>
        public List <OddsLiveMatch> GetScrollMatchList()
        {
            List <OddsLiveMatch> liveMatchList = new List <OddsLiveMatch>();

            try
            {
                HttpHelper h   = new HttpHelper();
                Cookie     lng = new Cookie("lng", "2");
                lng.Domain = domain;
                h.CookieContainer.Add(lng);
                string zoudi = h.GetHtml("https://" + domain + "/default.aspx" + zoudiUrl);
                if (!string.IsNullOrEmpty(zoudi))
                {
                    #region 分析网页html节点
                    Lexer    lexer     = new Lexer(zoudi);
                    Parser   parser    = new Parser(lexer);
                    NodeList bodyNodes = parser.Parse(new TagNameFilter("HTML"))[0].Children.ExtractAllNodesThatMatch(new TagNameFilter("BODY"))[0].Children;
                    ITag     divNode   = bodyNodes.ExtractAllNodesThatMatch(new TagNameFilter("FORM"))[0].Children.ExtractAllNodesThatMatch(new TagNameFilter("DIV"))[0] as ITag;
                    if (divNode.Attributes["ID"].Equals("PageBody"))
                    {
                        NodeList dataDivList = divNode.Children.SearchFor(typeof(Winista.Text.HtmlParser.Tags.Div));
                        if (dataDivList[0].ToPlainTextString() == "走地盤")
                        {
                            if (dataDivList[2].ToPlainTextString() == "全場賽果")
                            {
                                return(liveMatchList);
                            }
                            for (int i = 0; i < dataDivList.Count; i++)
                            {
                                ITag div = dataDivList[i] as ITag;
                                if (div.Attributes["CLASS"] != null && div.Attributes["CLASS"].Equals("menuRow"))
                                {
                                    OddsLiveMatch oddsLive = new OddsLiveMatch();
                                    oddsLive.urlparams = (div.FirstChild as ITag).Attributes["HREF"].ToString();
                                    oddsLive.id        = oddsLive.urlparams.Split('&')[0].Substring(4);
                                    oddsLive.time      = DateTime.Now;
                                    oddsLive.name      = div.ToPlainTextString();
                                    liveMatchList.Add(oddsLive);
                                }
                            }
                        }
                    }
                    #endregion 分析网页html节点
                }
            }
            catch (Exception)
            {
            }
            return(liveMatchList);
        }
예제 #6
0
        /// <summary>
        /// 示例:"#test_div span .need [disabled='disabled']"
        /// //不支持多条件筛选如:div[name='test'],不支持>筛选
        /// </summary>
        /// <param name="html">html字符串,需要以html标签包裹</param>
        /// <param name="queryStr">同于jquery筛选</param>
        /// <returns></returns>
        public static NodeList GetDomsFromHtml(string html, string queryStr)
        {
            //id与class等测试大小写不敏感
            HtmlPage page = GetPage(html);
            NodeList doms = page.Body;

            string[] queryArr = queryStr.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
            for (int i = 0; i < queryArr.Length; i++)
            {
                string query = queryArr[i];
                if (query.StartsWith("#"))
                {
                    string id = query.Replace("#", "");
                    doms = doms.ExtractAllNodesThatMatch(new HasAttributeFilter("id", id), true);
                }
                else if (query.StartsWith("."))
                {
                    string css = query.Replace(".", "");
                    doms = doms.ExtractAllNodesThatMatch(new HasAttributeFilter("class", css), true);
                }
                else if (query.StartsWith("["))
                {
                    // "[name='test']"
                    string[] attr  = query.Substring(1, query.Length - 2).Split('=');
                    string   name  = attr[0];
                    string   value = attr[1].Substring(1, attr[1].Length - 2);//去除单引号
                    doms = doms.ExtractAllNodesThatMatch(new HasAttributeFilter(name, value), true);
                }
                else //按标签查询
                {
                    string tag = query;
                    doms = doms.ExtractAllNodesThatMatch(new TagNameFilter(tag), true);
                }
            }
            return(doms);
        }
예제 #7
0
        public void DealHtml(IList list, string html, bool crawlAll, string ddlIndex, DateTime dateTime)
        {
            Parser   parserCtx      = new Parser(new Lexer(html));
            NodeList ctxNode        = parserCtx.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("id", "DropDownList2")));
            string   classification = ctxNode.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("option"), new HasAttributeFilter("value", ddlIndex)), true).AsString().Replace("&nbsp;", "");

            parserCtx.Reset();

            Parser   parserDtl = new Parser(new Lexer(html));
            NodeList aNodes    = parserDtl.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "GridView1")));

            if (aNodes != null && aNodes.Count == 1 && aNodes[0] is TableTag)
            {
                TableTag table = (TableTag)aNodes[0];
                for (int i = 1; i < table.Rows.Length; i++)
                {
                    string  corpName = string.Empty, corpType = string.Empty, corpClassification = string.Empty;
                    int     corpAllRanking = 0, classificationRanking = 0;
                    decimal realScore = 0;
                    corpName = table.Rows[i].Columns[1].ToPlainTextString().Replace("&nbsp;", "").Replace("\r", "").Replace("\n", "").Trim();
                    corpType = table.Rows[i].Columns[2].ToPlainTextString().Replace("&nbsp;", "").Replace("\r", "").Replace("\n", "").Trim();
                    try
                    {
                        corpAllRanking        = int.Parse(table.Rows[i].Columns[3].ToPlainTextString().Replace("&nbsp;", "").Replace("\r", "").Replace("\n", "").Trim());
                        classificationRanking = Convert.ToInt32(table.Rows[i].Columns[3].ToPlainTextString().Replace("&nbsp;", "").Replace("\r", "").Replace("\n", "").Trim());
                        realScore             = decimal.Parse(table.Rows[i].Columns[4].ToPlainTextString().Replace("&nbsp;", "").Replace("\r", "").Replace("\n", "").Trim());
                    }
                    catch (Exception ex) { Logger.Error(ex + "企业名称:" + corpName); continue; }

                    corpClassification = classification;
                    //DateTime dateTime=DateTime.Parse(DateTime.Today.AddDays(-1).ToString("yyyy-MM-dd"));
                    CorpCreditss info = ToolDb.GenCorpCreditSS(corpName, corpType, corpClassification, corpAllRanking, classificationRanking, 0, 0, 0, 0, 0, 0, dateTime, realScore, "广东省", "深圳市", DateTime.Now, "深圳市住房和建设局", SiteUrl);
                    if (info != null && !string.IsNullOrEmpty(info.CorpName))
                    {
                        ToolDb.SaveEntity(info, this.ExistCompareFields);
                    }
                    count++;
                    if (count >= 200)
                    {
                        Thread.Sleep(1000 * 500);
                        count = 1;
                    }
                }
            }
        }
예제 #8
0
        /// <summary>
        /// 获得列表
        /// </summary>
        /// <returns></returns>
        public List <OddsLiveMatch> GetMatchScrollOdds(string matchid, string urlparams)
        {
            List <OddsLiveMatch> liveMatchList = new List <OddsLiveMatch>();

            try
            {
                HttpHelper h   = new HttpHelper();
                Cookie     lng = new Cookie("lng", "2");
                lng.Domain = domain;
                h.CookieContainer.Add(lng);
                //string zoudi = h.GetHtml("https://" + domain + "/default.aspx" + urlparams);
                string zoudi = h.GetHtml(urlparams);
                if (!string.IsNullOrEmpty(zoudi))
                {
                    #region 分析网页html节点
                    Lexer    lexer     = new Lexer(zoudi);
                    Parser   parser    = new Parser(lexer);
                    NodeList bodyNodes = parser.Parse(new TagNameFilter("HTML"))[0].Children.ExtractAllNodesThatMatch(new TagNameFilter("BODY"))[0].Children;
                    ITag     divNode   = bodyNodes.ExtractAllNodesThatMatch(new TagNameFilter("FORM"))[0].Children.ExtractAllNodesThatMatch(new TagNameFilter("DIV"))[0] as ITag;
                    if (divNode.Attributes["ID"].Equals("PageBody"))
                    {
                        NodeList dataDivList = divNode.Children.SearchFor(typeof(Winista.Text.HtmlParser.Tags.Div));
                        if (dataDivList[0].ToPlainTextString() == "走地盤")
                        {
                            if (dataDivList[2].ToPlainTextString() == "全場賽果")
                            {
                                OddsLiveHistory liveHistory = new OddsLiveHistory();
                                liveHistory.matchid = matchid;
                                liveHistory.home    = float.Parse(dataDivList[3].ToPlainTextString().Split(' ')[0]);
                                liveHistory.draw    = float.Parse(dataDivList[5].ToPlainTextString().Split(' ')[0]);
                                liveHistory.away    = float.Parse(dataDivList[7].ToPlainTextString().Split(' ')[0]);
                                liveHistory.time    = DateTime.Now;
                                dal.AddHistory(liveHistory);
                            }
                        }
                    }
                    #endregion 分析网页html节点
                }
            }
            catch (Exception)
            {
            }
            return(liveMatchList);
        }
예제 #9
0
        public virtual bool NodePushRangeChildren()
        {
            NodeList nl = m_node.Children;

            nl = nl.ExtractAllNodesThatMatch(AndFilter.TrueFilter, true);
            if (nl.Count > 0)
            {
                m_nodestack.Push(m_node);
                m_nodestack.Push(m_nodeenum);
                m_nodestack.Push(m_nodelist);

                m_nodelist = nl;
                m_nodeenum = m_nodelist.Elements();
                m_node     = null;
                return(true);
            }
            else
            {
                return(false);
            }
        }
예제 #10
0
        public void DealHtml(IList list, string html, bool crawlAll, string ddlIndex)
        {
            Parser   parserCtxTime = new Parser(new Lexer(html));
            NodeList ctxNodeTime   = parserCtxTime.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("id", "drpRpt")));
            string   dateTime      = ctxNodeTime.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("option"), new HasAttributeFilter("value", "419425")), true).AsString().Replace("&nbsp;", "").Replace("\r", "").Replace("\n", "").Trim();

            parserCtxTime.Reset();

            Parser   parserCtx      = new Parser(new Lexer(html));
            NodeList ctxNode        = parserCtx.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("id", "DropDownList2")));
            string   classification = ctxNode.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("option"), new HasAttributeFilter("value", ddlIndex)), true).AsString().Replace("&nbsp;", "").Replace("\r", "").Replace("\n", "").Trim();

            parserCtx.Reset();

            Parser   parserDtl = new Parser(new Lexer(html));
            NodeList aNodes    = parserDtl.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "GridView1")));

            if (aNodes != null && aNodes.Count == 1 && aNodes[0] is TableTag)
            {
                TableTag table = (TableTag)aNodes[0];
                for (int i = 1; i < table.Rows.Length; i++)
                {
                    if (table.Rows[i].Columns.Length == 6)
                    {
                        string  corpName = string.Empty, corpType = string.Empty, corpClassification = string.Empty, corpRank = string.Empty;
                        int     corpAllRanking = 0, classificationRanking = 0;
                        decimal realScore = 0;

                        corpName = table.Rows[i].Columns[1].ToPlainTextString().Replace("&nbsp;", "").Replace("\r", "").Replace("\n", "").Trim();
                        corpRank = table.Rows[i].Columns[2].ToPlainTextString().Replace("&nbsp;", "").Replace("\r", "").Replace("\n", "").Trim();
                        try
                        {
                            corpAllRanking        = int.Parse(table.Rows[i].Columns[3].ToPlainTextString().Replace("&nbsp;", "").Replace("\r", "").Replace("\n", "").Trim());
                            classificationRanking = int.Parse(table.Rows[i].Columns[4].ToPlainTextString().Replace("&nbsp;", "").Replace("\r", "").Replace("\n", "").Trim());
                            realScore             = decimal.Parse(table.Rows[i].Columns[5].ToPlainTextString().Replace("&nbsp;", "").Replace("\r", "").Replace("\n", "").Trim());
                        }
                        catch (Exception ex)
                        {
                            Logger.Error(ex + "企业名称:" + corpName);
                            continue;
                        }
                        corpClassification = classification;
                        DateTime satrtTime = new DateTime();
                        DateTime endTime   = new DateTime();
                        try
                        {
                            satrtTime = DateTime.Parse(dateTime.Substring(0, 10));
                            endTime   = DateTime.Parse(dateTime.Substring(dateTime.IndexOf("----") + 4, 10));
                        }
                        catch (Exception ex)
                        {
                            Logger.Error(ex + "企业名称:" + corpName);
                            continue;
                        }
                        CorpCreditjd info = ToolDb.GenCorpCreditJD(corpName, corpType, corpRank, corpClassification.ToString(), corpAllRanking.ToString(), classificationRanking.ToString(), satrtTime.ToString().ToString(), endTime.ToString(), realScore.ToString(), "广东省", "深圳市", "深圳市住房和建设局", SiteUrl, "", "", "", "");
                        list.Add(info);
                        if (!crawlAll && list.Count >= this.MaxCount)
                        {
                            return;
                        }
                    }
                }
            }
            parserDtl.Reset();
        }
예제 #11
0
 private NodeList GetTagList(NodeList nodelist, string tag)
 {
     nodelist = nodelist.ExtractAllNodesThatMatch(new TagNameFilter(tag), true);
     return(nodelist);
 }
예제 #12
0
        public void DealHtml(IList list, string html, bool crawlAll)
        {
            Parser   parserDtl = new Parser(new Lexer(html));
            NodeList aNodes    = parserDtl.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_cph_context_BDGSList2_GridView1")));

            if (aNodes != null && aNodes.Count == 1 && aNodes[0] is TableTag)
            {
                TableTag table = (TableTag)aNodes[0];
                for (int i = 1; i < table.Rows.Length; i++)
                {
                    if (table.Rows[i].Columns.Length == 7)
                    {
                        Type   typs = typeof(ATag);
                        string InfoTitle = string.Empty, InfoType = string.Empty, PublistTime = string.Empty, InfoCtx = string.Empty, InfoUrl = string.Empty, prjCode = string.Empty, buildUnit = string.Empty, prjType = string.Empty, htmlTxt = string.Empty;
                        prjCode     = table.Rows[i].Columns[1].ToPlainTextString().Trim();
                        InfoTitle   = table.Rows[i].Columns[2].ToPlainTextString().Trim();
                        buildUnit   = table.Rows[i].Columns[3].ToPlainTextString().Trim();
                        prjType     = table.Rows[i].Columns[4].ToPlainTextString().Trim();
                        InfoType    = "标底公示";
                        PublistTime = table.Rows[i].Columns[5].ToPlainTextString().Trim();
                        string urlSpilt = (table.Rows[i].Columns[2].Children.SearchFor(typs, true)[0] as ATag).Link;

                        string url = urlSpilt.Substring(urlSpilt.IndexOf("\"")).Replace("\"", "").Replace(")", "");
                        InfoUrl = "http://jyzx.cb.gov.cn/LGjyzxWeb/SiteManage/" + url;
                        string ctxhtml = string.Empty;
                        try
                        {
                            ctxhtml = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8);
                        }
                        catch (Exception ex)
                        {
                            continue;
                        }

                        Parser parserCtx = new Parser(new Lexer(ctxhtml));


                        NodeList ctxNode = parserCtx.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "text")));

                        InfoCtx = ctxNode.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "ctl00_cph_context_lblContent")), true).AsString().Replace("&nbsp;", "");

                        htmlTxt = ctxNode.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "ctl00_cph_context_lblContent")), true).AsHtml();

                        NoticeInfo info = ToolDb.GenNoticeInfo("广东省", "深圳龙岗区工程", string.Empty, string.Empty, InfoTitle, InfoType, InfoCtx, PublistTime, string.Empty, "深圳市建设工程交易中心龙岗分中心", InfoUrl, prjCode, buildUnit, string.Empty, string.Empty, prjType, string.Empty, htmlTxt);

                        list.Add(info);

                        NodeList fileNode = ctxNode.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_cph_context_AccessoriesControl1_GridView1")), true);
                        if (fileNode != null && fileNode.Count > 0 && fileNode[0] is TableTag)
                        {
                            TableTag fileTable = fileNode[0] as TableTag;
                            for (int j = 1; j < fileTable.Rows.Length; j++)
                            {
                                BaseAttach attach = ToolDb.GenBaseAttach(fileTable.Rows[j].Columns[0].ToPlainTextString().Trim(), info.Id, "http://jyzx.cb.gov.cn/LGjyzxWeb/" + (fileTable.Rows[j].Columns[0].SearchFor(typs, true)[0] as ATag).Link.Replace("../", ""));
                                base.AttachList.Add(attach);
                            }
                        }

                        if (!crawlAll && list.Count >= this.MaxCount)
                        {
                            return;
                        }
                    }
                }
            }
            parserDtl.Reset();
        }
예제 #13
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new ArrayList();

            //取得页码
            int    pageInt = 1;
            string html    = string.Empty;
            string HtmlTxt = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default);
            }
            catch (Exception ex)
            {
                return(list);
            }

            Parser   parser = new Parser(new Lexer(html));
            NodeList sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new TagNameFilter("form")), new TagNameFilter("a")));

            if (sNode != null && sNode.Count > 0)
            {
                for (int i = 0; i < sNode.Count; i++)
                {
                    ATag pageA = sNode[i] as ATag;
                    if (pageA.ToPlainTextString().Contains("尾页"))
                    {
                        try
                        {
                            pageInt = int.Parse(pageA.Link.Remove(0, pageA.Link.LastIndexOf("=") + 1));
                        }
                        catch (Exception)
                        {
                        }
                    }
                }
            }

            parser.Reset();

            for (int i = 1; i <= pageInt; i++)
            {
                try
                {
                    html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl + "&Page=" + i.ToString()), Encoding.Default);
                }
                catch (Exception ex)
                {
                    continue;
                }

                parser = new Parser(new Lexer(html));
                sNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("bordercolor", "#CCCCCC")));
                if (sNode != null && sNode.Count > 0)
                {
                    HtmlTxt = sNode.AsHtml();
                    string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty,
                           bidDate = string.Empty, beginDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty;
                    StringBuilder ctx      = new StringBuilder();
                    decimal       decMoney = 0;
                    TableTag      table    = sNode[1] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        TableRow tr = table.Rows[j];
                        //招标类型
                        bidType = tr.Columns[0].ToPlainTextString();

                        string invType = "施工,设计,勘察,服务,劳务分包,专业分包,小型施工,监理,设备材料,其他";
                        if (invType.Contains(bidType))
                        {
                            specType = "建设工程";
                        }
                        else
                        {
                            specType = "其他";
                        }

                        //项目名称
                        prjName = tr.Columns[1].ToPlainTextString().Replace("&nbsp;", "");


                        //中标单位
                        bidUnit = tr.Columns[2].ToPlainTextString().Replace("&nbsp;", "");


                        //发布时间
                        bidDate = tr.Columns[3].ToPlainTextString().TrimStart('[').TrimEnd(']');

                        NodeList cNode = new NodeList();
                        //进行搜索子节点A标签
                        tr.Columns[1].CollectInto(cNode, new TagNameFilter("a"));


                        InfoUrl = "http://www.chjssz.gov.cn/" + (cNode[0] as ATag).Link;
                        prjName = ToolDb.GetPrjName(prjName);
                        bidType = ToolHtml.GetInviteTypes(bidType);
                        BidInfo info = ToolDb.GenBidInfo("广东省", "广州市区", "从化市", string.Empty, code, prjName, string.Empty, bidDate, bidUnit, bidDate, string.Empty, "见附件", string.Empty, "广州建设工程交易中心", bidType, specType, string.Empty, string.Empty, InfoUrl, string.Empty, HtmlTxt);
                        list.Add(info);


                        //采集内容页
                        string dlHtml = string.Empty;
                        try
                        {
                            dlHtml = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(InfoUrl), Encoding.Default);
                        }
                        catch (Exception ex)
                        {
                            continue;
                        }

                        Parser   dlParser = new Parser(new Lexer(dlHtml));
                        NodeList dlNodes  = dlParser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("background", "pic/abouts_16.jpg")));
                        if (dlNodes != null && dlNodes.Count > 0)
                        {
                            NodeList ddNode = dlNodes.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("A"), new HasAttributeFilter("target", "_blank")), true);
                            if (ddNode != null && ddNode.Count > 0)
                            {
                                for (int k = 0; k < ddNode.Count; k++)
                                {
                                    ATag ddATag = ddNode[k] as ATag;
                                    if (ddATag.Link.Contains("UploadFiles"))
                                    {
                                        BaseAttach attach = ToolDb.GenBaseAttach(ddATag.StringText, info.Id, "http://www.chjssz.gov.cn/" + ddATag.Link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                                dlParser.Reset();
                            }
                        }
                        if (!crawlAll && list.Count >= this.MaxCount)
                        {
                            return(list);
                        }
                    }
                }
            }

            return(list);
        }
예제 #14
0
        private IList GetList(int path, bool crawlAll, IList list)
        {
            string html = string.Empty;

            try
            {
                html = ToolWeb.GetHtmlByUrl(GetStartUrl(path), Encoding.Default);
            }
            catch (Exception ex)
            {
                Logger.Error(ex);
                return(list);
            }
            //第一页
            GetCorpStaffSzjsjMethod(path, list, html, crawlAll);
            if (!crawlAll && list.Count >= this.MaxCount)
            {
                return(list);
            }

            string   viewState = "";
            int      pageInt   = 1;
            Parser   parser    = new Parser(new Lexer(html));
            NodeList tdNodes   = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "Table1")));

            if (tdNodes != null)
            {
                try
                {
                    string pageTemp = tdNodes.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "ucPageNumControl_lbltotal")), true).AsString().Replace("&nbsp;", "").Trim();
                    pageInt = int.Parse(pageTemp);
                }
                catch (Exception ex) { Logger.Error(ex); }
            }
            parser.Reset();
            if (pageInt > 1)
            {
                for (int i = 2; i <= pageInt; i++)
                {
                    string cookiestr = string.Empty;
                    viewState = ToolWeb.GetAspNetViewState(html);
                    NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] { "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "CORP_NAME", "NAME", "ucPageNumControl:gotopage", "ucPageNumControl:NEXTpage" }, new string[] { string.Empty, string.Empty, viewState, string.Empty, string.Empty, string.Empty, "下一页" });

                    try
                    {
                        html = ToolWeb.GetHtmlByUrl(GetStartUrl(path), nvc, Encoding.Default, ref cookiestr);
                        //处理后续页
                        GetCorpStaffSzjsjMethod(path, list, html, crawlAll);
                    }
                    catch (Exception ex)
                    {
                        Logger.Error(ex);
                        continue;
                    }
                    if (!crawlAll && list.Count >= this.MaxCount)
                    {
                        return(list);
                    }
                }
            }
            return(null);
        }
예제 #15
0
        private void Save(int l, string bidhtml, IList list, bool crawlAll)
        {
            string Url             = "http://61.144.226.2:8008/JDScore.aspx?clearPaging=true&guid=450845";
            string htl             = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiedtstr     = string.Empty;

            try
            {
                htl = ToolWeb.GetHtmlByUrl(Url, Encoding.UTF8, ref cookiedtstr);
            }
            catch (Exception ex)
            {
                Logger.Error(ex.ToString());
            }

            string[] classLen = new string[] { "A", "A-", "B", "B-", "C", "C-" };
            string   ddlindex = l.ToString();

            if (l > 13)
            {
                ddlindex = "999999999";
            }
            for (int n = 0; n < classLen.Length; n++)
            {
                int pageInt = 1;
                viewState       = ToolWeb.GetAspNetViewState(htl);
                eventValidation = ToolWeb.GetAspNetEventValidation(htl);
                string strcookie         = string.Empty;
                NameValueCollection nvc3 = ToolWeb.GetNameValueCollection(new string[] { "__EVENTTARGET", "__EVENTARGUMENT",
                                                                                         "__LASTFOCUS", "__VIEWSTATE", "txtCorpName", "DropDownList1", "DropDownList2", "hiddenIsFirst", "GridViewPaging1$txtGridViewPagingForwardTo", "GridViewPaging1$btnForwardToPage", "__VIEWSTATEENCRYPTED", "__EVENTVALIDATION" },
                                                                          new string[] { "", "", "", viewState, "", classLen[n], ddlindex, "false", "1", "Go", "", eventValidation });
                try
                {
                    htl = ToolWeb.GetHtmlByUrl(Url, nvc3, Encoding.UTF8, ref strcookie);
                }
                catch (Exception ex) { }
                Parser   parser = new Parser(new Lexer(htl));
                NodeList dtList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "GridViewPaging1_lblGridViewPagingDesc")));

                if (dtList != null && dtList.Count > 0)
                {
                    Regex reg = new Regex(@"共\d+页");
                    try
                    {
                        pageInt = int.Parse(reg.Match(dtList.AsString()).Value.Trim(new char[] { '共', '页' }));
                    }
                    catch
                    { }
                }
                for (int k = 1; k <= pageInt; k++)
                {
                    if (k > 1)
                    {
                        string viewState1        = ToolWeb.GetAspNetViewState(htl);
                        string eventValidation1  = ToolWeb.GetAspNetEventValidation(htl);
                        NameValueCollection nvc4 = ToolWeb.GetNameValueCollection(new string[] { "__EVENTTARGET", "__EVENTARGUMENT",
                                                                                                 "__LASTFOCUS", "__VIEWSTATE", "txtCorpName", "DropDownList1", "DropDownList2", "hiddenIsFirst", "GridViewPaging1$txtGridViewPagingForwardTo", "GridViewPaging1$btnForwardToPage", "__VIEWSTATEENCRYPTED", "__EVENTVALIDATION" },
                                                                                  new string[] { "", "", "", viewState1, "", classLen[n], ddlindex, "false", k.ToString(), "Go", "", eventValidation1 });
                        try
                        {
                            htl = ToolWeb.GetHtmlByUrl(Url, nvc4, Encoding.UTF8, ref strcookie);
                        }

                        catch (Exception ex) { }
                    }
                    string   beg = string.Empty, end = string.Empty, avg = string.Empty, type = string.Empty, thtype = string.Empty, classlv = string.Empty;
                    Parser   parserCtx = new Parser(new Lexer(htl));
                    NodeList ctxNode   = parserCtx.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("id", "DropDownList1")));
                    classlv = ctxNode.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("option"), new HasAttributeFilter("value", classLen[n])), true).AsString().Replace("&nbsp;", "");

                    Parser   parserCtx2 = new Parser(new Lexer(htl));
                    NodeList ctxNode2   = parserCtx2.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("id", "DropDownList2")));
                    thtype = ctxNode2.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("option"), new HasAttributeFilter("value", ddlindex)), true).AsString().Replace("&nbsp;", "");


                    Parser   dtparser = new Parser(new Lexer(htl));
                    NodeList delList  = dtparser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "GridView2")));
                    if (delList != null && delList.Count > 0)
                    {
                        TableTag tab = delList[0] as TableTag;
                        for (int e = 1; e < tab.RowCount; e++)
                        {
                            Winista.Text.HtmlParser.Tags.TableRow trdate = tab.Rows[e];
                            type = trdate.Columns[0].ToPlainTextString().Trim();
                            beg  = trdate.Columns[1].ToPlainTextString().Trim();
                            end  = trdate.Columns[2].ToPlainTextString().Trim();
                            Regex  regInt = new Regex(@"\d{1,}[\.]?\d{0,}");
                            string temp   = trdate.Columns[3].ToPlainTextString();
                            avg = regInt.Match(temp).Value;
                        }
                    }
                    Parser   par     = new Parser(new Lexer(htl));
                    NodeList conList = par.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "GridView1")));
                    if (conList != null && conList.Count > 0)
                    {
                        TableTag tabContent = conList[0] as TableTag;
                        for (int f = 1; f < tabContent.RowCount; f++)
                        {
                            Winista.Text.HtmlParser.Tags.TableRow dr = tabContent.Rows[f];
                            string corpName = string.Empty, corpType = string.Empty, corpRank = string.Empty, corpCategory = string.Empty,
                                   ranking = string.Empty, categoryRank = string.Empty, realScore = string.Empty, province = string.Empty,
                                   city = string.Empty, infoSource = string.Empty, infourl = string.Empty, beginDate = string.Empty,
                                   endDate = string.Empty, bidhtl = string.Empty, bad = string.Empty, good = string.Empty;
                            if (dr.ColumnCount > 7)
                            {
                                corpName     = dr.Columns[1].ToPlainTextString().Trim();
                                categoryRank = dr.Columns[6].ToPlainTextString().Trim();
                                ranking      = dr.Columns[5].ToPlainTextString().Trim();
                                string rea     = dr.Columns[7].ToPlainTextString().Trim();
                                string goodStr = dr.Columns[3].ToPlainTextString().Trim();
                                string badStr  = dr.Columns[4].ToPlainTextString().Trim();
                                Regex  regInt  = new Regex(@"\d{1,}[\.]?\d{0,}");
                                realScore    = regInt.Match(rea).Value;
                                good         = regInt.Match(goodStr).Value;
                                bad          = regInt.Match(badStr).Value;
                                beginDate    = beg;
                                endDate      = end;
                                corpCategory = thtype;
                                corpRank     = classlv;
                                infourl      = Url;
                                corpType     = type;
                                infoSource   = "深圳市住房和建设局";
                                province     = "广东省";
                                city         = "深圳市";
                                bidhtl       = bidhtml;
                            }
                            else
                            {
                                corpName     = dr.Columns[1].ToPlainTextString().Trim();
                                categoryRank = dr.Columns[5].ToPlainTextString().Trim();
                                ranking      = dr.Columns[4].ToPlainTextString().Trim();
                                string rea     = dr.Columns[6].ToPlainTextString().Trim();
                                string goodStr = dr.Columns[2].ToPlainTextString().Trim();
                                string badStr  = dr.Columns[3].ToPlainTextString().Trim();
                                Regex  regInt  = new Regex(@"\d{1,}[\.]?\d{0,}");
                                realScore    = regInt.Match(rea).Value;
                                good         = regInt.Match(goodStr).Value;
                                bad          = regInt.Match(badStr).Value;
                                beginDate    = beg;
                                endDate      = end;
                                corpCategory = thtype;
                                corpRank     = classlv;
                                infourl      = Url;
                                corpType     = type;
                                infoSource   = "深圳市住房和建设局";
                                province     = "广东省";
                                city         = "深圳市";
                                bidhtl       = bidhtml;
                            }
                            CorpCreditjd info = ToolDb.GenCorpCreditJD(corpName, corpType, corpRank, corpCategory, ranking, categoryRank, beginDate, endDate, realScore, province, city, infoSource, infourl, bidhtl, avg, good, bad);
                            ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate);
                            count++;
                            sqlcount++;
                            //if (!crawlAll && list.Count >= this.MaxCount) break;
                            if (count > 200)
                            {
                                count = 0;
                                Thread.Sleep(120000);
                            }
                        }
                    }
                }
            }
        }
예제 #16
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new ArrayList();
            //取得页码
            int    pageInt = 1;
            string html    = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser  = new Parser(new Lexer(html));
            NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellspacing", "5")));

            if (tdNodes != null && tdNodes.Count > 0)
            {
                NodeList aNodes = new NodeList();
                tdNodes[0].CollectInto(aNodes, new TagNameFilter("a"));
                if (aNodes != null && aNodes.Count > 0)
                {
                    for (int i = 0; i < aNodes.Count; i++)
                    {
                        ATag aTag = aNodes[i] as ATag;
                        if (aTag.ToPlainTextString().Contains("尾页"))
                        {
                            Regex re = new Regex(@"[^0-9]+");
                            pageInt = int.Parse(re.Replace(aTag.Link, ""));
                            break;
                        }
                    }
                }
            }
            parser.Reset();
            for (int i = 1; i <= pageInt; i++)
            {
                try
                {
                    html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode("http://218.20.201.20/www/zbmsg/2008/xzb_list.asp?page=" + i.ToString() + "&id=13828"), Encoding.Default);
                }
                catch (Exception ex)
                {
                    continue;
                }

                parser  = new Parser(new Lexer(html));
                tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("a"), new HasParentFilter(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("class", "news_list")), true)));
                if (tdNodes != null && tdNodes.Count > 0)
                {
                    for (int j = 0; j < tdNodes.Count; j++)
                    {
                        string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, buildScale = string.Empty, buildCycle = string.Empty,
                               levels = string.Empty, structType = string.Empty, bidMoney = string.Empty, buildType = string.Empty, buildQual = string.Empty, InfoUrl = string.Empty, beginDate = string.Empty, bidType = string.Empty, HtmlTxt = string.Empty;
                        decimal       decMoney = 0;
                        StringBuilder ctx      = new StringBuilder();
                        ATag          aTag     = tdNodes[j] as ATag;
                        if (aTag.Link.Contains("xzb_show.asp"))
                        {
                            InfoUrl = "http://218.20.201.20/www/zbmsg/2008/" + aTag.Link.Remove(aTag.Link.IndexOf("&"));
                            Regex  regexHtml = new Regex(@"<div[^>]*>[\s]*</div>");
                            string dlHtml    = string.Empty;
                            try
                            {
                                dlHtml = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(InfoUrl), Encoding.Default).ToLower().Replace("&nbsp;", "");
                            }
                            catch (Exception ex)
                            {
                                continue;
                            }
                            string filterHtml = dlHtml.Replace("\n", "").Replace("\r", "").Replace("<u>", "<a>").Replace("</u>", "</a>");
                            prjName = aTag.ToPlainTextString();

                            //内容
                            Parser   ctxParser = new Parser(new Lexer(dlHtml));
                            NodeList ctxNodes  = ctxParser.ExtractAllNodesThatMatch(new HasParentFilter(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("class", "news_show")), false));

                            ctx.Append(ctxNodes.AsString().Replace("&nbsp;", ""));
                            HtmlTxt = ctxNodes.AsHtml();
                            Parser   dlParser = new Parser(new Lexer(regexHtml.Replace(filterHtml, "")));
                            NodeList dlNodes  = dlParser.ExtractAllNodesThatMatch(new HasParentFilter(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("class", "news_show")), false));


                            //搜索附件
                            NodeList findFiles = dlNodes.ExtractAllNodesThatMatch(new TagNameFilter("a"), true);
                            NodeList fileNode  = new NodeList();
                            if (findFiles != null && findFiles.Count > 0)
                            {
                                for (int f = 0; f < findFiles.Count; f++)
                                {
                                    ATag fileA = findFiles[f] as ATag;
                                    if (fileA.Link.Contains("uploadfile"))
                                    {
                                        fileNode.Add(fileA);
                                    }
                                }
                            }
                            INode nods = dlNodes[0].Parent.Parent.Parent.Parent;
                            //发布日期
                            if (nods != null)
                            {
                                TableTag tb = nods as TableTag;
                                for (int t = 0; t < tb.RowCount; t++)
                                {
                                    TableRow tr = tb.Rows[t];
                                    if (tr.ToPlainTextString().Contains("发布日期"))
                                    {
                                        beginDate = tr.ToPlainTextString().Substring(tr.ToPlainTextString().IndexOf("[") + 1, tr.ToPlainTextString().IndexOf("]") - tr.ToPlainTextString().IndexOf("[") - 1);
                                        break;
                                    }
                                }
                            }
                            for (int k = 0; k < dlNodes.Count; k++)
                            {
                                if (dlNodes[k] is ITag)
                                {
                                    //对a标签进行过滤
                                    Regex strReplace = new Regex(@"<a[^>]*>|</a>");
                                    if (dlNodes[k].ToPlainTextString().Contains("中标候选人为:") || dlNodes[k].ToPlainTextString().Contains("中标人为:"))
                                    {
                                        NodeList bidUnitNode = new NodeList();
                                        dlNodes[k].CollectInto(bidUnitNode, new TagNameFilter("a"));
                                        if (bidUnitNode.Count > 0)
                                        {
                                            //找出匹配的项
                                            Regex           regexbidUnit = new Regex(@"<a[^>]*>[^<]*</a>");
                                            MatchCollection matchbidUnit = null;
                                            if (dlNodes[k].ToPlainTextString().Contains("中标候选人为:"))
                                            {
                                                matchbidUnit = regexbidUnit.Matches(dlNodes[k].ToHtml().Substring(dlNodes[k].ToHtml().IndexOf("中标候选人为:")));
                                            }
                                            else if (dlNodes[k].ToPlainTextString().Contains("中标人为:"))
                                            {
                                                matchbidUnit = regexbidUnit.Matches(dlNodes[k].ToHtml().Substring(dlNodes[k].ToHtml().IndexOf("中标人为:")));
                                            }
                                            if (matchbidUnit != null && matchbidUnit.Count > 0)
                                            {
                                                bidUnit = strReplace.Replace(matchbidUnit[0].ToString(), "");
                                            }
                                            if (string.IsNullOrEmpty(bidUnit))
                                            {
                                                bidUnit = dlNodes[k + 1].ToPlainTextString().Trim();
                                            }
                                        }
                                        else
                                        {
                                            bidUnit = dlNodes[k + 1].ToPlainTextString();
                                        }
                                    }
                                    if (dlNodes[k].ToPlainTextString().Contains("中标价:") || dlNodes[k].ToPlainTextString().Contains("投标报价:") || dlNodes[k].ToPlainTextString().Contains("中标价:") || dlNodes[k].ToPlainTextString().Contains("中标价为"))
                                    {
                                        Regex    regdecimal = new Regex(@"\d{1,}[\.]?\d{0,}");
                                        NodeList moneyNode  = new NodeList();
                                        dlNodes[k].CollectInto(moneyNode, new TagNameFilter("a"));
                                        if (moneyNode.Count > 0)
                                        {
                                            Regex           regexmoney = new Regex(@"<a[^>]*>[^<]*</a>");
                                            MatchCollection matchmoney = null;
                                            if (dlNodes[k].ToPlainTextString().Contains("中标价:"))
                                            {
                                                matchmoney = regexmoney.Matches(dlNodes[k].ToHtml().Substring(dlNodes[k].ToHtml().IndexOf("中标价:")));
                                            }
                                            if (dlNodes[k].ToPlainTextString().Contains("投标报价:"))
                                            {
                                                matchmoney = regexmoney.Matches(dlNodes[k].ToHtml().Substring(dlNodes[k].ToHtml().IndexOf("投标报价:")));
                                            }
                                            if (matchmoney != null && matchmoney.Count > 0)
                                            {
                                                if (dlNodes[k].ToPlainTextString().Contains("万元"))
                                                {
                                                    try
                                                    {
                                                        decMoney = decimal.Parse(regdecimal.Matches(dlNodes[k].ToPlainTextString())[0].ToString());
                                                    }
                                                    catch (Exception ex)
                                                    {
                                                    }
                                                }
                                                else
                                                {
                                                    try
                                                    {
                                                        decMoney = decimal.Parse(regdecimal.Matches(dlNodes[k].ToPlainTextString())[0].ToString()) / 10000;
                                                    }
                                                    catch (Exception ex)
                                                    {
                                                    }
                                                }
                                            }
                                        }
                                        else
                                        {
                                            if (dlNodes[k].ToPlainTextString().Contains("万元"))
                                            {
                                                decMoney = decimal.Parse(regdecimal.Matches(dlNodes[k].ToPlainTextString().ToString())[0].ToString());
                                            }
                                            else
                                            {
                                                decMoney = decimal.Parse(regdecimal.Matches(dlNodes[k].ToPlainTextString().ToString())[0].ToString()) / 10000;
                                            }
                                        }
                                    }
                                }
                            }
                            string regexstr = @"<[^>]*>";
                            string ctxStr   = Regex.Replace(ctx.ToString(), regexstr, string.Empty, RegexOptions.IgnoreCase);
                            bidUnit = bidUnit.Replace(" ", "").Trim();
                            Regex reg = new Regex(@"[\u4e00-\u9fa5]");
                            if (!reg.IsMatch(bidUnit))
                            {
                                bidUnit = "";
                            }
                            else
                            {
                                Regex  regBidMoneys = new Regex(@"[0-9]+[.]{0,1}[0-9]+");
                                string t            = regBidMoneys.Match(bidUnit).Value;
                                if (!string.IsNullOrEmpty(t))
                                {
                                    bidUnit = "";
                                }
                            }
                            if (string.IsNullOrEmpty(bidUnit) || decMoney <= 0)
                            {
                                string txt = string.Empty;
                                parser = new Parser(new Lexer(dlHtml));
                                NodeList dtList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasParentFilter(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("class", "news_show")), true)));
                                if (dtList != null && dtList.Count > 1)
                                {
                                    for (int k = 0; k < dtList.Count; k++)
                                    {
                                        if (dtList[k].ToPlainTextString().Trim().Contains("中标候选人") || dtList[k].ToPlainTextString().Trim().Contains("中标人"))
                                        {
                                            try
                                            {
                                                if (string.IsNullOrEmpty(dtList[k + 1].ToPlainTextString().Trim()))
                                                {
                                                    txt += dtList[k].ToPlainTextString().Trim();
                                                    string text = txt.Remove(txt.Length - txt.IndexOf("为:") - 2);
                                                    if (string.IsNullOrEmpty(text))
                                                    {
                                                        txt += dtList[k].ToPlainTextString().Trim();
                                                        txt += dtList[k + 2].ToPlainTextString().Trim() + "\r\n";
                                                    }
                                                    else
                                                    {
                                                        txt += dtList[k].ToPlainTextString().Trim() + "\r\n";
                                                    }
                                                }
                                                else
                                                {
                                                    txt += dtList[k].ToPlainTextString().Trim();
                                                    string text = txt.Remove(txt.Length - txt.IndexOf("为:") - 2);
                                                    if (string.IsNullOrEmpty(text))
                                                    {
                                                        txt += dtList[k].ToPlainTextString().Trim();
                                                        txt += dtList[k + 1].ToPlainTextString().Trim() + "\r\n";
                                                    }
                                                    else
                                                    {
                                                        txt += dtList[k].ToPlainTextString().Trim() + "\r\n";
                                                    }
                                                }
                                            }
                                            catch { }
                                        }
                                        else
                                        {
                                            txt += dtList[k].ToPlainTextString().Trim() + "\r\n";
                                        }
                                    }
                                    if (string.IsNullOrEmpty(bidUnit))
                                    {
                                        Regex regBidUnit = new Regex(@"(中标单位|中标候选单位|中标候选人为|中标人为):[^\r\n]+\r\n");
                                        bidUnit = regBidUnit.Match(txt.Replace("\r\n\r\n", "")).Value.Replace("中标候选人为", "").Replace("中标人为", "").Replace("中标单位:", "").Replace("中标候选单位:", "").Replace(":", "").Trim();
                                    }
                                    if (decMoney <= 0)
                                    {
                                        Regex  regBidMoneystr = new Regex(@"(金额|价格|报价|中标价)(:|:)[^\r\n]+\r\n");
                                        string monerystr      = regBidMoneystr.Match(txt).Value.Replace("中标价", "").Replace("金额", "").Replace("价格", "").Replace("报价", "").Replace(":", "").Replace(":", "").Replace(",", "").Replace(",", "").Trim();
                                        Regex  regBidMoney    = new Regex(@"[0-9]+[.]{0,1}[0-9]+");
                                        if (!string.IsNullOrEmpty(regBidMoney.Match(monerystr).Value))
                                        {
                                            if (monerystr.Contains("万元") || monerystr.Contains("万美元"))
                                            {
                                                decMoney = decimal.Parse(regBidMoney.Match(monerystr).Value);
                                            }
                                            else
                                            {
                                                try
                                                {
                                                    decMoney = decimal.Parse(regBidMoney.Match(monerystr).Value) / 10000;
                                                    if (decMoney < decimal.Parse("0.1"))
                                                    {
                                                        decMoney = 0;
                                                    }
                                                }
                                                catch (Exception)
                                                {
                                                    decMoney = 0;
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                            if (string.IsNullOrEmpty(bidUnit) || decMoney <= 0)
                            {
                                string txt = string.Empty;
                                parser = new Parser(new Lexer(dlHtml));
                                NodeList dtList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("p"), new HasParentFilter(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("class", "news_show")), true)));
                                if (dtList != null && dtList.Count > 1)
                                {
                                    for (int k = 0; k < dtList.Count; k++)
                                    {
                                        if (dtList[k].ToPlainTextString().Trim().Contains("中标候选人") || dtList[k].ToPlainTextString().Trim().Contains("中标人"))
                                        {
                                            if (string.IsNullOrEmpty(dtList[k + 1].ToPlainTextString().Trim()))
                                            {
                                                k++;
                                                txt += dtList[k].ToPlainTextString().Trim();
                                            }
                                            else
                                            {
                                                txt += dtList[k].ToPlainTextString().Trim();
                                                string text = txt.Remove(txt.Length - txt.IndexOf("为:") - 2);
                                                if (string.IsNullOrEmpty(text))
                                                {
                                                    txt  = "";
                                                    txt += dtList[k].ToPlainTextString().Trim();
                                                }
                                                else
                                                {
                                                    txt  = "";
                                                    txt += dtList[k].ToPlainTextString().Trim() + "\r\n";
                                                }
                                            }
                                        }
                                        else
                                        {
                                            txt += dtList[k].ToPlainTextString().Trim() + "\r\n";
                                        }
                                        Regex regexsHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>");
                                        txt = regexsHtml.Replace(txt, "");
                                    }
                                    if (string.IsNullOrEmpty(bidUnit))
                                    {
                                        Regex regBidUnit = new Regex(@"(中标单位|中标候选单位|中标候选人为|中标人为):[^\r\n]+\r\n");
                                        bidUnit = regBidUnit.Match(txt.Replace("\r\n\r\n", "")).Value.Replace("中标候选人为", "").Replace("中标人为", "").Replace("中标单位:", "").Replace("中标候选单位:", "").Replace(":", "").Trim();
                                    }
                                    if (string.IsNullOrEmpty(bidMoney))
                                    {
                                        Regex  regBidMoneystr = new Regex(@"(金额|价格|报价|中标价|中标价为)(:|:)[^\r\n]+\r\n");
                                        string monerystr      = regBidMoneystr.Match(txt).Value.Replace("中标价为", "").Replace("中标价", "").Replace("金额", "").Replace("价格", "").Replace("报价", "").Replace(":", "").Replace(":", "").Replace(",", "").Replace(",", "").Trim();
                                        Regex  regBidMoney    = new Regex(@"[0-9]+[.]{0,1}[0-9]+");
                                        if (!string.IsNullOrEmpty(regBidMoney.Match(monerystr).Value))
                                        {
                                            if (monerystr.Contains("万元") || monerystr.Contains("万美元"))
                                            {
                                                decMoney = decimal.Parse(regBidMoney.Match(monerystr).Value);
                                            }
                                            else
                                            {
                                                try
                                                {
                                                    decMoney = decimal.Parse(regBidMoney.Match(monerystr).Value) / 10000;
                                                    if (decMoney < decimal.Parse("0.1"))
                                                    {
                                                        decMoney = 0;
                                                    }
                                                }
                                                catch (Exception)
                                                {
                                                    decMoney = 0;
                                                }
                                            }
                                        }
                                    }
                                }
                            }

                            prjName = ToolDb.GetPrjName(prjName.Replace(" ", ""));
                            bidType = ToolHtml.GetInviteTypes(prjName);
                            BidInfo info = ToolDb.GenBidInfo("广东省", "广州市区", "番禺区", string.Empty, string.Empty, prjName, buildUnit, beginDate, bidUnit, beginDate, string.Empty, ctxStr, string.Empty, "广州市番禺区建设局", bidType, "建设工程", string.Empty, decMoney.ToString(), InfoUrl, string.Empty, HtmlTxt);

                            list.Add(info);
                            if (fileNode.Count > 0)
                            {
                                try
                                {
                                    for (int f = 0; f < fileNode.Count; f++)
                                    {
                                        BaseAttach attach = ToolDb.GenBaseAttach((fileNode[0] as ATag).StringText, info.Id, "http://218.20.201.20" + (fileNode[0] as ATag).Link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                                catch { }
                            }
                            dlParser.Reset();
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }

            return(list);
        }