Ejemplo n.º 1
0
        private void GetCorpStaffSzjsjMethod(string url, IList list, string html, bool crawlAll)
        {
            Parser   parser = new Parser(new Lexer(html));
            NodeList aNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "dgConstBid")));

            if (aNodes != null && aNodes.Count == 1 && aNodes[0] is TableTag)
            {
                TableTag table = (TableTag)aNodes[0];
                for (int i = 1; i < table.Rows.Length; i++)
                {
                    if (table.Rows[i].Columns.Length == 6)
                    {
                        Type   typs = typeof(ATag);
                        string Name = string.Empty, Sex = string.Empty, CredType = string.Empty, IdNum = string.Empty, CorpName = string.Empty, CorpCode = string.Empty, CertCode = string.Empty, CertGrade = string.Empty, RegLevel = string.Empty, RegCode = string.Empty, AuthorUnit = string.Empty, PersonType = string.Empty, Province = string.Empty, City = string.Empty, CreateTime = string.Empty, InfoSource = string.Empty, Url = string.Empty, Profession = string.Empty;
                        Name = table.Rows[i].Columns[1].ToPlainTextString().Trim().Replace("&nbsp;", "");
                        //Sex = table.Rows[i].Columns[1].ToPlainTextString().Trim().Replace("&nbsp;", "");
                        string urlSpilt = (table.Rows[i].Columns[1].Children.SearchFor(typs, true)[0] as ATag).Link;
                        string idnum    = urlSpilt.Replace("GoDetail('", "").Replace("');", "");                                                              //urlSpilt.Substring(urlSpilt.IndexOf("('"), (urlSpilt.Length  - 2));
                        IdNum      = idnum.Replace("&am", "").Replace("&a", "").Replace("p;c", "").Replace("cate", "").Replace("cat", "").Replace("ate", ""); //
                        CorpName   = table.Rows[i].Columns[2].ToPlainTextString().Trim().Replace("&nbsp;", "");
                        CorpCode   = CorpName;
                        CertCode   = table.Rows[i].Columns[4].ToPlainTextString().Trim().Replace("&nbsp;", "");
                        Profession = table.Rows[i].Columns[5].ToPlainTextString().Trim().Replace("&nbsp;", "");
                        PersonType = table.Rows[i].Columns[3].ToPlainTextString().Trim().Replace("&nbsp;", "");
                        Url        = "http://61.144.226.2/ryxx/Detail_LWDZ.aspx?ID_NUMBER=" + idnum;
                        string ctxhtml = string.Empty;
                        try
                        {
                            ctxhtml = ToolWeb.GetHtmlByUrl(Url, Encoding.Default);
                        }
                        catch (Exception ex)
                        {
                            Logger.Error("人员姓名:" + CorpName + ",证件号:" + IdNum + "所在单位:" + CorpName + "," + Url + ";" + ex);
                            continue;
                        }

                        Parser          parserCtx = new Parser(new Lexer(ctxhtml));
                        NodeList        ctxNode   = parserCtx.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("borderColor", "#cccccc")));
                        TableTag        tabTag    = ctxNode[0] as TableTag;
                        string          text      = ctxNode.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("TD"), new HasAttributeFilter("width", "76%")), true).AsString().Replace("&nbsp;", "");
                        string          strSpilt  = "任职企业编号:.*?\r\n";
                        MatchCollection mc        = Regex.Matches(text, strSpilt);
                        foreach (Match m in mc)
                        {
                            CorpCode = m.ToString().Replace("任职企业编号:", "").Replace("\r\n", "");
                        }
                        CorpStaff corpStaff = ToolDb.GenCorpStaff(Name, Sex, CredType, string.Empty, CorpName, CorpCode, CertCode, RegLevel, RegCode, AuthorUnit, PersonType, CertGrade, "广东省", "深圳市区", "深圳市住房和建设局", Url, Profession, "", "", "", "");
                        // list.Add(corpStaff);
                        ToolDb.SaveEntity(corpStaff, this.ExistCompareFields);

                        // if (!crawlAll && list.Count >= this.MaxCount) return;
                    }
                }
                parser.Reset();
            }
        }
Ejemplo n.º 2
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            int    count           = 0;
            IList  list            = new List <CorpStaff>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;
            string pageHtl         = string.Empty;

            try
            {
                html = ToolWeb.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(null);
            }
            int      totalPage = 0;
            Parser   parser    = new Parser(new Lexer(html));
            NodeList pageNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "clearfix")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().Replace(",", ",");
                    string page = temp.GetRegexBegEnd("total", ",").GetReplace("\":");
                    totalPage = int.Parse(page);
                    pageInt   = totalPage / 15 + 1;
                }
                catch { }
            }
            for (int p = 1; p <= pageInt; p++)
            {
                if (p > 1)
                {
                    Logger.Error(p);
                    NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] {
                        "$total",
                        "$reload",
                        "$pg",
                        "$pgsz"
                    },
                                                                             new string[] {
                        totalPage.ToString(),
                        "0",
                        p.ToString(),
                        "15"
                    });
                    try
                    {
                        html = ToolWeb.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8);
                    }
                    catch
                    {
                        try
                        {
                            Thread.Sleep(60 * 1000 * 6);
                            html = ToolWeb.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8);
                        }
                        catch
                        {
                            try
                            {
                                Thread.Sleep(60 * 1000 * 6);
                                html = ToolWeb.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8);
                            }
                            catch
                            {
                                continue;
                            }
                        }
                    }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "table_box responsive personal")));

                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int i = 1; i < table.RowCount - 1; i++)
                    {
                        TableRow tr = table.Rows[i];

                        string Name = string.Empty, Sex = string.Empty, CredType = string.Empty, IdNum = string.Empty, CorpName = string.Empty, CorpCode = string.Empty, CertCode = string.Empty, CertGrade = string.Empty, RegLevel = string.Empty, RegCode = string.Empty, AuthorUnit = string.Empty, PersonType = string.Empty, Province = string.Empty, City = string.Empty, CreateTime = string.Empty, InfoSource = string.Empty, Url = string.Empty, Profession = string.Empty, staffNum = string.Empty, IssuanceTime = string.Empty, Organ = string.Empty;

                        Name       = tr.Columns[1].ToNodePlainString();
                        IdNum      = tr.Columns[2].ToNodePlainString();
                        CertGrade  = tr.Columns[3].ToNodePlainString();
                        RegCode    = tr.Columns[4].ToNodePlainString();
                        PersonType = tr.Columns[5].ToNodePlainString();
                        ATag aTag = tr.Columns[1].GetATag();
                        Url = "http://jzsc.mohurd.gov.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = ToolWeb.GetHtmlByUrl(Url, Encoding.UTF8).GetJsString();
                        }
                        catch
                        {
                            try
                            {
                                Thread.Sleep(60 * 1000 * 6);
                                htmldtl = ToolWeb.GetHtmlByUrl(Url, Encoding.UTF8).GetJsString();
                            }
                            catch
                            {
                                try
                                {
                                    Thread.Sleep(60 * 1000 * 6);
                                    htmldtl = ToolWeb.GetHtmlByUrl(Url, Encoding.UTF8).GetJsString();
                                }
                                catch
                                {
                                    continue;
                                }
                            }
                        }

                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "activeTinyTabContent")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            string ctx = dtlNode.AsHtml().GetReplace("</dd>", "\r\n").ToCtxString();
                            Sex = ctx.GetRegex("性别");
                        }
                        parser.Reset();
                        dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "regcert_tab")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            string ctx = dtlNode.AsHtml().GetReplace("</dd>", "\r\n").ToCtxString();
                            CertCode = ctx.GetRegex("证书编号");
                            ATag nameTag = dtlNode.GetATag(1);
                            if (nameTag != null)
                            {
                                CorpName = nameTag.LinkText.ToNodeString();
                            }
                        }

                        CorpStaff corpStaff = ToolDb.GenCorpStaff(Name, Sex, CredType, IdNum, CorpName, CorpCode, CertCode, RegLevel, RegCode, AuthorUnit, PersonType, CertGrade, "全国", "", "中华人民共和国住房和城乡建设部建筑市场监管司", Url, Profession, staffNum, IssuanceTime, Organ, "");
                        ToolDb.SaveEntity(corpStaff, this.ExistCompareFields, this.ExistsUpdate);

                        count++;

                        if (count >= 28)
                        {
                            count = 0;
                            Thread.Sleep(60 * 1000 * 6);
                        }
                    }
                }
            }
            return(null);
        }
Ejemplo n.º 3
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;
            string pageHtl         = string.Empty;

            try
            {
                html = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch { return(null); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "ctl00_ContentPlaceHolder1_AspNetPager1")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    string temp   = pageList[0].ToPlainTextString().GetRegexBegEnd("共", "条");
                    int    page   = int.Parse(temp);
                    int    result = page / 15;
                    if (page % 15 != 0)
                    {
                        pageInt = result + 1;
                    }
                    else
                    {
                        pageInt = result;
                    }
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                i = 500;
                if (i > 1)
                {
                    try
                    {
                        viewState = ToolWeb.GetAspNetViewState(html);
                        NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] {
                            "__EVENTTARGET",
                            "__EVENTARGUMENT",
                            "__VIEWSTATE",
                            "ctl00$ContentPlaceHolder1$txtName",
                            "ctl00$ContentPlaceHolder1$txtIdNum",
                            "ctl00$ContentPlaceHolder1$txtEmpName",
                            "ctl00$ContentPlaceHolder1$txtEMP_ORG_CODE",
                            "ctl00$ContentPlaceHolder1$txtCertNum",
                            "ctl00$ContentPlaceHolder1$rdoIsDock"
                        }, new string[] {
                            "ctl00$ContentPlaceHolder1$AspNetPager1",
                            i.ToString(),
                            viewState,
                            "", "", "", "", "", "0"
                        });
                        html = ToolWeb.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8);
                    }
                    catch { }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "dataTable")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string Name = string.Empty, Sex = string.Empty, CredType = string.Empty, IdNum = string.Empty, CorpName = string.Empty, CorpCode = string.Empty, CertCode = string.Empty, CertGrade = string.Empty, RegLevel = string.Empty, RegCode = string.Empty, AuthorUnit = string.Empty, PersonType = string.Empty, Province = string.Empty, City = string.Empty, CreateTime = string.Empty, InfoSource = string.Empty, Url = string.Empty, Profession = string.Empty, staffNum = string.Empty, IssuanceTime = string.Empty, Organ = string.Empty, CertState = string.Empty;

                        TableRow tr = table.Rows[j];
                        Name       = tr.Columns[1].ToNodePlainString();
                        RegCode    = tr.Columns[2].ToNodePlainString();
                        CertCode   = tr.Columns[3].ToNodePlainString();
                        CorpName   = tr.Columns[5].ToNodePlainString();
                        PersonType = tr.Columns[4].ToNodePlainString();
                        CertGrade  = tr.Columns[6].ToNodePlainString();
                        string htldtl = string.Empty;
                        Url = "http://113.108.219.40/PlatForm/SearchCenter/" + tr.Columns[2].GetATagHref();
                        string sexUrl = "http://113.108.219.40/PlatForm/SearchCenter/" + tr.Columns[1].GetATagHref();
                        try
                        {
                            string htl = ToolWeb.GetHtmlByUrl(sexUrl, Encoding.UTF8);
                            parser = new Parser(new Lexer(htl));
                            NodeList dtlList = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                            if (dtlList != null && dtlList.Count > 0)
                            {
                                TableTag tab = dtlList[0] as TableTag;
                                string   ctx = string.Empty;
                                for (int k = 0; k < tab.RowCount; k++)
                                {
                                    for (int d = 0; d < tab.Rows[k].ColumnCount; d++)
                                    {
                                        if ((d + 1) % 2 == 0)
                                        {
                                            ctx += tab.Rows[k].Columns[d].ToNodePlainString() + "\r\n";
                                        }
                                        else
                                        {
                                            ctx += tab.Rows[k].Columns[d].ToNodePlainString().Replace(":", "").Replace(":", "") + ":";
                                        }
                                    }
                                }
                                Sex = ctx.GetRegex(new string[] { "性别" });
                            }
                        }
                        catch { }
                        try
                        {
                            htldtl = ToolWeb.GetHtmlByUrl(Url, Encoding.UTF8);
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtList = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                        if (dtList != null && dtList.Count > 0)
                        {
                            TableTag tab = dtList[0] as TableTag;
                            string   ctx = string.Empty;
                            for (int k = 0; k < tab.RowCount; k++)
                            {
                                for (int d = 0; d < tab.Rows[k].ColumnCount; d++)
                                {
                                    if ((d + 1) % 2 == 0)
                                    {
                                        ctx += tab.Rows[k].Columns[d].ToNodePlainString() + "\r\n";
                                    }
                                    else
                                    {
                                        ctx += tab.Rows[k].Columns[d].ToNodePlainString().Replace(":", "").Replace(":", "") + ":";
                                    }
                                }
                            }
                            IssuanceTime = ctx.GetRegex(new string[] { "签发日期", "日期" });
                            CertState    = ctx.GetRegex(new string[] { "证书状态" });
                            Organ        = ctx.GetRegex(new string[] { "发证机关" });

                            staffNum = CertGrade.GetLevel();

                            CorpStaff corpStaff = ToolDb.GenCorpStaff(Name, Sex, CredType, IdNum, CorpName, CorpCode, CertCode, RegLevel, RegCode, AuthorUnit, PersonType, CertGrade, "广东省", "广东地区", "广东省住房和城乡建设厅", Url, Profession, staffNum, IssuanceTime, Organ, CertState);
                            ToolDb.SaveEntity(corpStaff, this.ExistCompareFields, this.ExistsUpdate);
                        }
                    }
                }
            }
            return(null);
        }
Ejemplo n.º 4
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch { return(null); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "ContentPlaceHolder1_aspnetPager1")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    string temp = pageList[0].ToPlainTextString().GetRegexBegEnd("/", "页");;
                    pageInt = int.Parse(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    NameValueCollection nvc = ToolWeb.GetNameValueCollection(
                        new string[] { "searchStr", "currentPage", "pageSize", "tab", "kind" },
                        new string[] { string.Empty, i.ToString(), "15", "4", "zyxx" }
                        );
                    html = ToolWeb.GetHtmlByUrl("http://119.145.135.38/fscx/web/tab4List.do", nvc, Encoding.Default);
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "data-table2")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = null;
                    if (nodeList.Count > 1)
                    {
                        table = nodeList[1] as TableTag;
                    }
                    else
                    {
                        table = nodeList[0] as TableTag;
                    }

                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string Name = string.Empty, Sex = string.Empty, CredType = string.Empty, IdNum = string.Empty, CorpName = string.Empty, CorpCode = string.Empty, CertCode = string.Empty, CertGrade = string.Empty, RegLevel = string.Empty, RegCode = string.Empty, AuthorUnit = string.Empty, PersonType = string.Empty, Province = string.Empty, City = string.Empty, CreateTime = string.Empty, InfoSource = string.Empty, Url = string.Empty, Profession = string.Empty, staffNum = string.Empty, IssuanceTime = string.Empty, Organ = string.Empty;

                        TableRow tr = table.Rows[j];
                        Name         = tr.Columns[0].ToNodePlainString();
                        CorpName     = tr.Columns[1].ToNodePlainString();
                        CertCode     = tr.Columns[2].ToNodePlainString().Replace(".", "");
                        IssuanceTime = tr.Columns[3].ToPlainTextString().GetDateRegex();
                        Regex  regexLink = new Regex(@"\?id=[^&]+");
                        string temp      = tr.GetAttribute("onclick").GetRegexBegEnd("'", "'");
                        string ids       = regexLink.Match(temp).Value;
                        Url = "http://119.145.135.38/fscx/web/tab4Detail.do" + ids;
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = ToolWeb.GetHtmlByUrl(Url, Encoding.Default);
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "tabs-1")), true), new TagNameFilter("table")));
                        if (dtList != null && dtList.Count > 0)
                        {
                            string   ctx = string.Empty;
                            TableTag tab = dtList[0] as TableTag;
                            for (int d = 0; d < tab.RowCount; d++)
                            {
                                for (int k = 0; k < tab.Rows[d].ColumnCount; k++)
                                {
                                    if ((k + 1) % 2 == 0)
                                    {
                                        ctx += tab.Rows[d].Columns[k].ToNodePlainString() + "\r\n";
                                    }
                                    else
                                    {
                                        ctx += tab.Rows[d].Columns[k].ToNodePlainString() + ":";
                                    }
                                }
                            }
                            Sex        = ctx.GetRegex("性别");
                            CorpCode   = ctx.GetRegex("所在单位机构代码");
                            PersonType = ctx.GetRegex("专业");
                        }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList cDtList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "tabs1-1")), true), new TagNameFilter("table")));
                        if (cDtList != null && cDtList.Count > 0)
                        {
                            TableTag tab = cDtList[0] as TableTag;
                            for (int k = 1; k < tab.RowCount; k++)
                            {
                                TableRow dr   = tab.Rows[k];
                                string   code = dr.Columns[0].ToNodePlainString();
                                if (code.Contains(CertCode))
                                {
                                    CertCode  = code;
                                    CredType  = dr.Columns[2].ToNodePlainString();
                                    CertGrade = dr.Columns[3].ToNodePlainString();
                                    string type = dr.Columns[4].ToNodePlainString();
                                    if (!string.IsNullOrEmpty(type))
                                    {
                                        PersonType = type;
                                    }
                                    Organ    = dr.Columns[5].ToNodePlainString();
                                    staffNum = CertGrade.GetLevel();
                                }
                                else
                                {
                                    continue;
                                }
                            }
                        }
                        if (PersonType == "-" || PersonType == "/")
                        {
                            PersonType = string.Empty;
                        }
                        CorpStaff corpStaff = ToolDb.GenCorpStaff(Name, Sex, CredType, IdNum, CorpName, CorpCode, CertCode, RegLevel, RegCode, AuthorUnit, PersonType, CertGrade, "广东省", "佛山市", "佛山市住房和城乡建设管理局", Url, Profession, staffNum, IssuanceTime, Organ, "");
                        ToolDb.SaveEntity(corpStaff, this.ExistCompareFields, this.ExistsUpdate);
                    }
                }
            }
            return(null);
        }
Ejemplo n.º 5
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList     list  = new List <CorpStaff>();
            int       count = 1;
            Hashtable has   = new Hashtable();

            has.Add("注册建造工程师", "http://61.144.226.2:8001/web/personAction.do?method=getPersonList&category=2");
            has.Add("注册建筑工程师", "http://61.144.226.2:8001/web/personAction.do?method=getPersonList&category=5");
            has.Add("注册结构工程师", "http://61.144.226.2:8001/web/personAction.do?method=getPersonList&category=6");
            has.Add("注册监理工程师", "http://61.144.226.2:8001/web/personAction.do?method=getPersonList&category=3");
            has.Add("水利监理工程师", "http://61.144.226.2:8001/web/sljlAction.do?method=getSljlList&pageSize=50");
            has.Add("注册造价工程师", "http://61.144.226.2:8001/web/personAction.do?method=getPersonList&category=4");
            has.Add("小型项目负责人", "http://61.144.226.2:8001/web/xxxmAction.do?method=getXxxmList");
            has.Add("质量主任", "http://61.144.226.2:8001/web/personAction.do?method=getPersonList&category=7");
            has.Add("安全主任", "http://61.144.226.2:8001/web/personAction.do?method=getPersonList&category=8");
            has.Add("劳务队长", "http://61.144.226.2:8001/web/lwdzAction.do?method=getLwdzList");
            foreach (string item in has.Keys)
            {
                int    sqlCount        = 0;
                string htl             = string.Empty;
                string cookiestr       = string.Empty;
                string viewState       = string.Empty;
                int    pageInt         = 1;
                string eventValidation = string.Empty;
                string pageHtl         = string.Empty;
                try
                {
                    if (item == "小型项目负责人")
                    {
                        htl = ToolWeb.GetHtmlByUrl("http://61.144.226.2:8001/web/xxxmAction.do?pageSize=3000&page=1&backUrl=&page=136&method=getXxxmList&method=getXxxmList&personname=&personname=&orgName=&orgName=", Encoding.Default);
                    }
                    else
                    {
                        htl = ToolWeb.GetHtmlByUrl(has[item].ToString(), Encoding.Default);
                    }
                }
                catch
                {
                    continue;
                }
                Parser   parser   = new Parser(new Lexer(htl));
                NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("a"), new HasAttributeFilter("id", "lx")));
                if (pageNode != null && pageNode.Count > 0)
                {
                    try
                    {
                        string temp = pageNode.GetATagHref().GetRegexBegEnd("page=", "&");
                        pageInt = int.Parse(temp);
                    }
                    catch
                    {
                    }
                }
                for (int i = 1; i <= pageInt; i++)
                {
                    if (i > 1)
                    {
                        try
                        {
                            if (item != "小型项目负责人")
                            {
                                htl = ToolWeb.GetHtmlByUrl(has[item] + "&page=" + i.ToString(), Encoding.Default);
                            }
                            else
                            {
                                break;
                            }
                        }
                        catch
                        {
                            continue;
                        }
                    }
                    parser = new Parser(new Lexer(htl));
                    NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "bean")));
                    if (nodeList != null && nodeList.Count > 0)
                    {
                        TableTag table = nodeList[0] as TableTag;
                        for (int j = 1; j < table.RowCount; j++)
                        {
                            string Name = string.Empty, Sex = string.Empty, CredType = string.Empty, IdNum = string.Empty, CorpName = string.Empty, CorpCode = string.Empty, CertCode = string.Empty, CertGrade = string.Empty, RegLevel = string.Empty, RegCode = string.Empty, AuthorUnit = string.Empty, PersonType = string.Empty, Province = string.Empty, City = string.Empty, CreateTime = string.Empty, InfoSource = string.Empty, Url = string.Empty, Profession = string.Empty, staffNum = string.Empty;

                            TableRow tr = table.Rows[j];

                            if (item.Contains("注册建造工程师") || item.Contains("注册建筑工程师") || item.Contains("注册结构工程师"))
                            {
                                Name      = tr.Columns[1].ToNodePlainString();
                                CorpName  = tr.Columns[2].ToNodePlainString();
                                CertCode  = tr.Columns[4].ToNodePlainString();
                                CertGrade = tr.Columns[5].ToNodePlainString();
                            }

                            if (item.Contains("水利监理工程师"))
                            {
                                Name       = tr.Columns[1].ToNodePlainString();
                                CertCode   = tr.Columns[3].ToNodePlainString();
                                Profession = tr.Columns[4].ToNodePlainString();
                            }

                            if (item.Contains("注册监理工程师") || item.Contains("注册造价工程师"))
                            {
                                Name     = tr.Columns[1].ToNodePlainString();
                                CorpName = tr.Columns[2].ToNodePlainString();
                                CertCode = tr.Columns[4].ToNodePlainString();
                            }

                            if (item.Contains("小型项目负责人"))
                            {
                                Name       = tr.Columns[1].ToNodePlainString();
                                CorpName   = tr.Columns[2].ToNodePlainString();
                                CertCode   = tr.Columns[4].ToNodePlainString();
                                Profession = tr.Columns[5].ToNodePlainString();
                            }

                            if (item.Contains("质量主任") || item.Contains("安全主任"))
                            {
                                Name     = tr.Columns[1].ToNodePlainString();
                                CorpName = tr.Columns[2].ToNodePlainString();
                            }
                            if (item.Contains("劳务队长"))
                            {
                                Name     = tr.Columns[1].ToNodePlainString();
                                CorpName = tr.Columns[2].ToNodePlainString();
                                CertCode = tr.Columns[4].ToNodePlainString();
                            }



                            PersonType = item;
                            string tempUrl = "http://61.144.226.2:8001/web/" + tr.Columns[1].GetATagValue("onclick").Replace("doView", "").Replace("(", "").Replace(")", "").Replace("'", "");
                            string htmldtl = string.Empty;
                            try
                            {
                                htmldtl = ToolWeb.GetHtmlByUrl(tempUrl, Encoding.Default);
                            }
                            catch { }
                            string ctx = string.Empty;
                            parser = new Parser(new Lexer(htmldtl.Replace("th", "td")));
                            NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "infoTableL")));
                            if (dtlNode != null && dtlNode.Count > 0)
                            {
                                TableTag tableDtl = dtlNode[0] as TableTag;
                                for (int k = 0; k < tableDtl.RowCount; k++)
                                {
                                    for (int d = 0; d < tableDtl.Rows[k].ColumnCount; d++)
                                    {
                                        string temp = tableDtl.Rows[k].Columns[d].ToNodePlainString().Replace(":", "").Replace(":", "");
                                        if (d == 0)
                                        {
                                            ctx += temp += ":";
                                        }
                                        else
                                        {
                                            ctx += temp += "\r\n";
                                        }
                                    }
                                }
                            }
                            CorpCode = ctx.GetRegex("任职企业编号");
                            staffNum = CertGrade.GetLevel();

                            CorpStaff corpStaff = ToolDb.GenCorpStaff(Name, Sex, CredType, IdNum, CorpName, CorpCode, CertCode, RegLevel, RegCode, AuthorUnit, PersonType, CertGrade, "广东省", "深圳市", "深圳市住房和建设局", tempUrl, Profession, staffNum, "", "", "");
                            sqlCount++;
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            ToolDb.SaveEntity(corpStaff, this.ExistCompareFields, this.ExistsUpdate);

                            count++;
                            if (count >= 100)
                            {
                                count = 1;
                                Thread.Sleep(480000);
                            }
                        }
                    }
                }
            }
            return(list);
        }