Example #1
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list = new List <NoticeInfo>();
            string sql  = "select Id,PrjCode,InfoCtx,InfoUrl from NoticeInfo where (PrjCode='' or PrjCode is null) ";

            sql += " and convert(varchar(max), InfoCtx) <> '见附件' and convert(varchar(max), InfoCtx)<>'详见附件' ";
            sql += " and datalength (InfoCtx)<>0 and datalength (InfoCtx) is not null";
            DataTable dt = ToolCoreDb.GetDbData(sql);

            if (dt != null && dt.Rows.Count > 0)
            {
                foreach (DataRow row in dt.Rows)
                {
                    string ctx     = Convert.ToString(row["InfoCtx"]);
                    string prjCode = ctx.GetNoticePrjCode();
                    if (string.IsNullOrEmpty(prjCode))
                    {
                        prjCode = ctx.GetRegexBegEnd("工程编号", "工程名称").Replace(":", "").Replace(":", "").Replace("\r", "").Replace("\n", "").Replace(" ", "").Replace("\t", "");
                    }
                    string update = "update NoticeInfo set PrjCode='" + prjCode + "' where Id='" + row["Id"].ToString() + "'";
                    int    result = ToolCoreDb.ExecuteSql(update);
                }
            }
            return(list);
        }
Example #2
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = ToolWeb.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch { return(null); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "ContentPlaceHolder1_AspNetPager1")), true), new TagNameFilter("a")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[pageNode.Count - 1].GetATagHref().Replace("&#39;", "").Replace(")", "kdxx").Replace(",", "xxdk");
                    pageInt = int.Parse(temp.GetRegexBegEnd("xxdk", "kdxx"));
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        if (i == 2)
                        {
                            viewState       = ToolWeb.GetAspNetViewState(html);
                            eventValidation = ToolWeb.GetAspNetEventValidation(html);
                        }
                        NameValueCollection nvc = ToolWeb.GetNameValueCollection(
                            new string[] {
                            "ctl00$ContentPlaceHolder1$ScriptManager1",
                            "ctl00$ContentPlaceHolder1$txtORGNAME",
                            "ctl00$ContentPlaceHolder1$txtORGCODE",
                            "ctl00$ContentPlaceHolder1$txtPNAME",
                            "ctl00$ContentPlaceHolder1$txtIDNUM",
                            "ctl00$ContentPlaceHolder1$txtHIREERORGNAME",
                            "ctl00$ContentPlaceHolder1$txtHIREERORGCODE",
                            "ctl00$ContentPlaceHolder1$ddlRegType",
                            "ctl00$ContentPlaceHolder1$ddlTitle",
                            "ctl00$ContentPlaceHolder1$ddlABC",
                            "ctl00$ContentPlaceHolder1$ddlCert",
                            "__VIEWSTATE",
                            "__EVENTTARGET",
                            "__EVENTARGUMENT",
                            "__EVENTVALIDATION",
                            "__ASYNCPOST"
                        },
                            new string[] {
                            "ctl00$ContentPlaceHolder1$UpdatePanel1|ctl00$ContentPlaceHolder1$AspNetPager1",
                            "", "", "", "", "", "", "", "", "", "",
                            viewState,
                            "ctl00$ContentPlaceHolder1$AspNetPager1",
                            i.ToString(),
                            eventValidation,
                            "true"
                        }
                            );

                        html = ToolWeb.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "data-grid")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string CorpName = string.Empty, CorpCode = string.Empty, CorpAddress = string.Empty,
                               RegDate = string.Empty, RegFund = string.Empty, BusinessCode = string.Empty,
                               BusinessType = string.Empty, LinkMan = string.Empty, LinkPhone = string.Empty,
                               Fax = string.Empty, Email = string.Empty, CorpSite = string.Empty, cUrl = string.Empty, CorpType,
                               ISOQualNum = string.Empty, ISOEnvironNum = string.Empty, OffAdr = string.Empty, Cert = string.Empty;

                        TableRow tr = table.Rows[j];
                        CorpName = tr.Columns[0].ToNodePlainString();
                        LinkMan  = tr.Columns[1].ToNodePlainString();
                        cUrl     = tr.Columns[0].GetATagValue("onclick").Replace("OpenWin('", "");
                        if (cUrl.IndexOf("'") > 0)
                        {
                            cUrl = "http://113.108.219.40/intogd/" + cUrl.Remove(cUrl.IndexOf("'"));
                        }
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = ToolWeb.GetHtmlByUrl(cUrl, Encoding.UTF8);
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "data-table")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            string   ctx      = string.Empty;
                            TableTag dtlTable = dtlNode[0] as TableTag;
                            for (int k = 0; k < dtlTable.RowCount; k++)
                            {
                                for (int d = 0; d < dtlTable.Rows[k].ColumnCount; d++)
                                {
                                    TableColumn col = dtlTable.Rows[k].Columns[d];
                                    if (col.GetAttribute("class") == "td-left")
                                    {
                                        ctx += col.ToNodePlainString() + ":";
                                    }
                                    else
                                    {
                                        ctx += col.ToNodePlainString() + "\r\n";
                                    }
                                }
                            }


                            RegDate      = ctx.GetRegex("成立时间,注册时间").GetDateRegex();
                            RegFund      = ctx.GetRegex("注册资本");
                            BusinessCode = ctx.GetRegex("营业执照注册号");
                            CorpType     = "外地进粤企业";
                            CorpAddress  = ctx.GetRegex("注册详细地址");
                            if (!string.IsNullOrEmpty(RegFund) && !RegFund.Contains("万"))
                            {
                                RegFund += "万";
                            }

                            CorpInfo  corp   = ToolDb.GenCorpInfo(CorpName, CorpCode, CorpAddress, RegDate, RegFund, BusinessCode, BusinessType, LinkMan, LinkPhone, Fax, Email, CorpSite, CorpType, "广东省", "广东地区", "广东省住房和城乡建设厅", cUrl, ISOQualNum, ISOEnvironNum, OffAdr);
                            string    strSql = string.Format("select Id from CorpInfo where CorpName='{0}' and CorpType='{1}'", corp.CorpName, corp.CorpType);
                            DataTable dt     = ToolCoreDb.GetDbData(strSql);
                            if (dt != null && dt.Rows.Count > 0)
                            {
                                string        id                 = dt.Rows[0]["Id"].ToString();
                                StringBuilder delCorpQual        = new System.Text.StringBuilder();
                                StringBuilder delCorpLeader      = new System.Text.StringBuilder();
                                StringBuilder delCorpSecLicStaff = new System.Text.StringBuilder();
                                StringBuilder delCorpInstitution = new StringBuilder();
                                delCorpInstitution.AppendFormat("delete from CorpInstitution where CorpId='{0}'", id);
                                delCorpQual.AppendFormat("delete from CorpQual where CorpId='{0}'", id);
                                delCorpLeader.AppendFormat("delete from CorpLeader where CorpId='{0}'", id);
                                delCorpSecLicStaff.AppendFormat("delete from CorpTecStaff where CorpId='{0}'", id);
                                ToolCoreDb.ExecuteSql(delCorpInstitution.ToString());
                                ToolCoreDb.ExecuteSql(delCorpQual.ToString());
                                ToolCoreDb.ExecuteSql(delCorpLeader.ToString());
                                ToolCoreDb.ExecuteSql(delCorpSecLicStaff.ToString());
                                string corpSql = string.Format("delete from CorpInfo where Id='{0}'", id);
                                ToolCoreDb.ExecuteSql(corpSql);
                            }
                            if (ToolDb.SaveEntity(corp, this.ExistCompareFields))
                            {
                                if (!string.IsNullOrEmpty(LinkMan))
                                {
                                    CorpLeader leader = ToolDb.GenCorpLeader(corp.Id, LinkMan, "", "企业法定代表人", cUrl);
                                    ToolDb.SaveEntity(leader, "");
                                }
                                if (!string.IsNullOrEmpty(tr.Columns[2].ToNodePlainString()))
                                {
                                    CorpLeader leader = ToolDb.GenCorpLeader(corp.Id, tr.Columns[2].ToNodePlainString(), "", "技术负责人", cUrl);
                                    ToolDb.SaveEntity(leader, "");
                                }
                                if (!string.IsNullOrEmpty(tr.Columns[3].ToNodePlainString()))
                                {
                                    CorpLeader leader = ToolDb.GenCorpLeader(corp.Id, tr.Columns[3].ToNodePlainString(), "", "驻粤负责人", cUrl);
                                    ToolDb.SaveEntity(leader, "");
                                }
                                AddCorpQual(corp, htmldtl);
                                AddCorpTecStaff(corp, htmldtl);
                                GetOffAddress(htmldtl, cUrl, corp);
                            }
                        }
                    }
                }
            }
            return(null);
        }
Example #3
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            int    count = 1, totalCount = 1;
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;
            string pageHtl         = string.Empty;

            try
            {
                html = ToolWeb.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("style", "text-align:center;padding-bottom:10px;")));

            if (pageNode != null && pageNode.Count > 0)
            {
                string temp = pageNode[0].ToNodePlainString();
                try
                {
                    temp    = temp.GetRegexBegEnd("总页数", "页");
                    pageInt = int.Parse(temp.Replace(":", ""));
                }
                catch { }
            }
            for (int i = 320; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] { "param", "corpType", "corp_name", "page" }, new string[] { "", "1", "", i.ToString() });
                    try
                    {
                        html = ToolWeb.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8);
                    }
                    catch
                    {
                        Thread.Sleep(12 * 60 * 1000);
                        try
                        {
                            html = ToolWeb.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8);
                        }
                        catch
                        {
                            Thread.Sleep(8 * 60 * 1000);
                            try
                            {
                                html = ToolWeb.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8);
                            }
                            catch
                            {
                                Thread.Sleep(8 * 60 * 1000);
                                continue;
                            }
                        }
                    }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "bean")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string CorpName = string.Empty, CorpCode = string.Empty, CorpAddress = string.Empty,
                               RegDate = string.Empty, RegFund = string.Empty, BusinessCode = string.Empty,
                               BusinessType = string.Empty, LinkMan = string.Empty, LinkPhone = string.Empty,
                               Fax = string.Empty, Email = string.Empty, CorpSite = string.Empty, cUrl = string.Empty,
                               ISOQualNum = string.Empty, ISOEnvironNum = string.Empty, OffAdr = string.Empty, Cert = string.Empty, ctxKc = string.Empty,
                               corpRz = string.Empty;

                        TableRow tr = table.Rows[j];
                        CorpName = tr.Columns[1].ToNodePlainString();
                        CorpCode = tr.Columns[2].ToNodePlainString();
                        LinkMan  = tr.Columns[3].ToNodePlainString();
                        string              href       = tr.Columns[1].GetATagHref();
                        string              htmldtl    = string.Empty;
                        string[]            postParams = null;
                        NameValueCollection dtlNvc     = null;
                        string              infoUrl    = "http://portal.szjs.gov.cn:8888/publicShow/corpDetail.html";

                        try
                        {
                            string temp = href.Replace("corpDetail", "").Replace("(", "").Replace(")", "").Replace("'", "");
                            postParams = temp.Split(',');
                            dtlNvc     = ToolWeb.GetNameValueCollection(new string[] { "param", "corpType", "orgCode" }, new string[] { postParams[0], "1", postParams[1] });
                            cUrl       = infoUrl + string.Format("?param={0}&corpType=1&orgCode={1}", postParams[0], CorpCode);
                        }
                        catch { continue; }
                        try
                        {
                            htmldtl = ToolWeb.GetHtmlByUrl(infoUrl, dtlNvc, Encoding.UTF8);
                        }
                        catch
                        {
                            Thread.Sleep(12 * 60 * 1000);
                            try
                            {
                                ToolWeb.GetHtmlByUrl(infoUrl, dtlNvc, Encoding.UTF8);
                            }
                            catch
                            {
                                Thread.Sleep(8 * 60 * 1000);
                                try
                                {
                                    ToolWeb.GetHtmlByUrl(infoUrl, dtlNvc, Encoding.UTF8);
                                }
                                catch
                                {
                                    Thread.Sleep(8 * 60 * 1000);
                                    continue;
                                }
                            }
                        }
                        parser = new Parser(new Lexer(htmldtl.Replace("th", "td")));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("align", "center")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            TableTag tabledtl = dtlNode[0] as TableTag;
                            string   ctx      = string.Empty;
                            for (int d = 0; d < tabledtl.RowCount; d++)
                            {
                                for (int k = 0; k < tabledtl.Rows[d].ColumnCount; k++)
                                {
                                    string temp = tabledtl.Rows[d].Columns[k].ToNodePlainString();
                                    if (k == 0)
                                    {
                                        ctx += temp + ":";
                                    }
                                    else
                                    {
                                        ctx += temp + "\r\n";
                                    }
                                }
                            }
                            LinkPhone   = ctx.GetRegex("联系电话");
                            Fax         = ctx.GetRegex("传真");
                            Email       = ctx.GetRegex("电子邮箱");
                            CorpAddress = ctx.GetRegex("注册地址");
                            RegFund     = ctx.GetRegex("注册资金");
                            RegDate     = ctx.GetRegex("设立时间");
                        }

                        CorpInfo info = ToolDb.GenCorpInfo(CorpName, CorpCode, CorpAddress, RegDate, RegFund, BusinessCode, BusinessType, LinkMan, LinkPhone, Fax, Email, CorpSite, "建筑业企业", "广东省", "深圳市", "深圳市住房和建设局", cUrl, ISOQualNum, ISOEnvironNum, OffAdr);

                        object obj = ToolDb.ExecuteScalar(string.Format("select Id from CorpInfo where CorpName='{0}' and CorpType='{1}' and InfoSource='{2}'", info.CorpName, info.CorpType, info.InfoSource));
                        int    qualCount = 0, leaderCount = 0, awardCount = 0, certCount = 0, punishCount = 0, seclicCount = 0, seclicstaffCount = 0, tecstaffCount = 0, deviceCount = 0, resultCount = 0, infoCount = 0;
                        bool   isDel = false;
                        if (obj != null && obj.ToString() != "")
                        {
                            isDel = true;
                            string        id                 = obj.ToString();
                            StringBuilder delCorpQual        = new System.Text.StringBuilder();
                            StringBuilder delCorpLeader      = new System.Text.StringBuilder();
                            StringBuilder delCorpAward       = new System.Text.StringBuilder();
                            StringBuilder delCorpCert        = new System.Text.StringBuilder();
                            StringBuilder delCorpPunish      = new System.Text.StringBuilder();
                            StringBuilder delCorpSecLic      = new System.Text.StringBuilder();
                            StringBuilder delCorpSecLicStaff = new System.Text.StringBuilder();
                            StringBuilder delCorpDevice      = new System.Text.StringBuilder();
                            StringBuilder delCorpResults     = new System.Text.StringBuilder();
                            StringBuilder delCorpTecStaff    = new System.Text.StringBuilder();
                            delCorpQual.AppendFormat("delete from CorpQual where CorpId='{0}'", id);
                            delCorpLeader.AppendFormat("delete from CorpLeader where CorpId='{0}'", id);
                            delCorpAward.AppendFormat("delete from CorpAward where CorpId='{0}'", id);
                            //delCorpCert.AppendFormat("delete from CorpCert where CorpId='{0}'", id);
                            delCorpPunish.AppendFormat("delete from CorpPunish where CorpId='{0}'", id);
                            delCorpSecLic.AppendFormat("delete from CorpSecLic where CorpId='{0}'", id);
                            delCorpSecLicStaff.AppendFormat("delete from CorpSecLicStaff where CorpId='{0}'", id);
                            delCorpTecStaff.AppendFormat("delete from CorpTecStaff where CorpId='{0}'", id);
                            //delCorpDevice.AppendFormat("delete from CorpDevice where CorpId='{0}'", id);
                            delCorpResults.AppendFormat("delete from CorpResults where CorpId='{0}'", id);
                            qualCount   = ToolCoreDb.ExecuteSql(delCorpQual.ToString());
                            leaderCount = ToolCoreDb.ExecuteSql(delCorpLeader.ToString());
                            awardCount  = ToolCoreDb.ExecuteSql(delCorpAward.ToString());
                            //certCount = ToolCoreDb.ExecuteSql(delCorpCert.ToString());
                            punishCount      = ToolCoreDb.ExecuteSql(delCorpPunish.ToString());
                            seclicCount      = ToolCoreDb.ExecuteSql(delCorpSecLic.ToString());
                            seclicstaffCount = ToolCoreDb.ExecuteSql(delCorpSecLicStaff.ToString());
                            tecstaffCount    = ToolCoreDb.ExecuteSql(delCorpTecStaff.ToString());
                            //deviceCount = ToolCoreDb.ExecuteSql(delCorpDevice.ToString());
                            resultCount = ToolCoreDb.ExecuteSql(delCorpResults.ToString());
                            string corpSql = string.Format("delete from CorpInfo where Id='{0}'", id);
                            infoCount = ToolCoreDb.ExecuteSql(corpSql);
                        }
                        if (infoCount != -1 || !isDel)
                        {
                            if (ToolDb.SaveEntity(info, string.Empty))
                            {
                                if (isDel)
                                {
                                    if (qualCount != -1)
                                    {
                                        AddCorpQual(info, postParams[0], "1");
                                    }
                                    if (awardCount != -1)
                                    {
                                        AddCorpAward(info, postParams[0], "1");
                                    }
                                    //if (certCount != -1)
                                    //    AddCorpCert(info, htmldtl);
                                    //if (deviceCount != -1)
                                    //    AddCorpDevice(info, htmldtl);
                                    if (punishCount != -1)
                                    {
                                        AddCorpPunish(info, postParams[0], "1");
                                    }
                                    if (resultCount != -1)
                                    {
                                        AddCorpResults(info, postParams[0], "1");
                                    }
                                    if (seclicCount != -1)
                                    {
                                        AddCorpSecLic(info, postParams[0], "1");
                                    }
                                    if (seclicstaffCount != -1)
                                    {
                                        AddCorpSecLicStaff(info, postParams[0], "1");
                                    }
                                    if (tecstaffCount != -1)
                                    {
                                        AddCorpTecStaff(info, postParams[0], "1");
                                    }
                                    if (leaderCount != -1)
                                    {
                                        AddCorpLeader(info, postParams[0], "1");
                                    }
                                }
                                else
                                {
                                    AddCorpQual(info, postParams[0], "1");
                                    AddCorpAward(info, postParams[0], "1");
                                    //AddCorpCert(info, htmldtl);
                                    //AddCorpDevice(info, htmldtl);
                                    AddCorpPunish(info, postParams[0], "1");
                                    AddCorpResults(info, postParams[0], "1");
                                    AddCorpSecLic(info, postParams[0], "1");
                                    AddCorpSecLicStaff(info, postParams[0], "1");
                                    AddCorpTecStaff(info, postParams[0], "1");
                                    AddCorpLeader(info, postParams[0], "1");
                                }
                            }
                        }
                        count++;
                        totalCount++;
                        if (count >= 90)
                        {
                            count = 1;
                            Thread.Sleep(10 * 60 * 1000);
                        }
                    }
                }
            }
            ToolCoreDb.ExecuteProcedure();
            string sql = "update a set a.FkId= c.Id FROM AttenCorp  a left join  CorpInfo c on c.CorpName=A.CorpName";

            ToolDb.ExecuteSql(sql);
            return(null);
        }
Example #4
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            int    count = 1, totalCount = 1;
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;
            string pageHtl         = string.Empty;

            try
            {
                html = ToolWeb.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("a"), new HasAttributeFilter("id", "lx")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.GetATagHref().GetRegexBegEnd("page=", "&");
                    pageInt = int.Parse(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = ToolWeb.GetHtmlByUrl(this.SiteUrl + "&page=" + i.ToString(), Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "bean")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string CorpName = string.Empty, CorpCode = string.Empty, CorpAddress = string.Empty,
                               RegDate = string.Empty, RegFund = string.Empty, BusinessCode = string.Empty,
                               BusinessType = string.Empty, LinkMan = string.Empty, LinkPhone = string.Empty,
                               Fax = string.Empty, Email = string.Empty, CorpSite = string.Empty, cUrl = string.Empty,
                               ISOQualNum = string.Empty, ISOEnvironNum = string.Empty, OffAdr = string.Empty, Cert = string.Empty, ctxKc = string.Empty,
                               corpRz = string.Empty;

                        TableRow tr = table.Rows[j];
                        CorpName = tr.Columns[1].ToNodePlainString();
                        CorpCode = tr.Columns[2].ToNodePlainString();
                        LinkMan  = tr.Columns[3].ToNodePlainString();
                        string   href    = tr.Columns[1].GetATagValue("onclick");
                        string   htmldtl = string.Empty;
                        string[] url     = null;
                        try
                        {
                            string temp = href.Replace("doView", "").Replace("(", "").Replace(")", "").Replace("'", "");
                            url     = temp.Split(',');
                            cUrl    = "http://61.144.226.2:8001/web/enterprs/unitInfoAction.do?method=toView&qybh=" + url[0] + "&certType=1&orgcode=" + url[1];
                            htmldtl = ToolWeb.GetHtmlByUrl(cUrl, Encoding.Default);
                        }
                        catch
                        {
                            continue;
                        }
                        parser = new Parser(new Lexer(htmldtl.Replace("th", "td")));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "infoTableL")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            TableTag tabledtl = dtlNode[0] as TableTag;
                            string   ctx      = string.Empty;
                            for (int d = 0; d < tabledtl.RowCount; d++)
                            {
                                for (int k = 0; k < tabledtl.Rows[d].ColumnCount; k++)
                                {
                                    string temp = tabledtl.Rows[d].Columns[k].ToNodePlainString();
                                    if (k == 0)
                                    {
                                        ctx += temp + ":";
                                    }
                                    else
                                    {
                                        ctx += temp + "\r\n";
                                    }
                                }
                            }
                            LinkPhone   = ctx.GetRegex("联系电话");
                            Fax         = ctx.GetRegex("传真");
                            Email       = ctx.GetRegex("电子邮箱");
                            CorpAddress = ctx.GetRegex("注册地址");
                            RegFund     = ctx.GetRegex("注册资金");
                            RegDate     = ctx.GetRegex("设立时间");
                        }

                        CorpInfo info = ToolDb.GenCorpInfo(CorpName, CorpCode, CorpAddress, RegDate, RegFund, BusinessCode, BusinessType, LinkMan, LinkPhone, Fax, Email, CorpSite, "设计与施工一体化企业", "广东省", "深圳市", "深圳市住房和建设局", cUrl, ISOQualNum, ISOEnvironNum, OffAdr);

                        object obj = ToolDb.ExecuteScalar(string.Format("select Id from CorpInfo where CorpName='{0}' and CorpType='{1}' and InfoSource='{2}'", info.CorpName, info.CorpType, info.InfoSource));
                        int    qualCount = 0, leaderCount = 0, awardCount = 0, certCount = 0, punishCount = 0, seclicCount = 0, seclicstaffCount = 0, tecstaffCount = 0, deviceCount = 0, resultCount = 0, infoCount = 0;
                        bool   isDel = false;
                        if (obj != null && obj.ToString() != "")
                        {
                            isDel = true;
                            string        id                 = obj.ToString();
                            StringBuilder delCorpQual        = new System.Text.StringBuilder();
                            StringBuilder delCorpLeader      = new System.Text.StringBuilder();
                            StringBuilder delCorpAward       = new System.Text.StringBuilder();
                            StringBuilder delCorpCert        = new System.Text.StringBuilder();
                            StringBuilder delCorpPunish      = new System.Text.StringBuilder();
                            StringBuilder delCorpSecLic      = new System.Text.StringBuilder();
                            StringBuilder delCorpSecLicStaff = new System.Text.StringBuilder();
                            StringBuilder delCorpDevice      = new System.Text.StringBuilder();
                            StringBuilder delCorpResults     = new System.Text.StringBuilder();
                            StringBuilder delCorpTecStaff    = new System.Text.StringBuilder();
                            delCorpQual.AppendFormat("delete from CorpQual where CorpId='{0}'", id);
                            delCorpLeader.AppendFormat("delete from CorpLeader where CorpId='{0}'", id);
                            delCorpAward.AppendFormat("delete from CorpAward where CorpId='{0}'", id);
                            delCorpCert.AppendFormat("delete from CorpCert where CorpId='{0}'", id);
                            delCorpPunish.AppendFormat("delete from CorpPunish where CorpId='{0}'", id);
                            delCorpSecLic.AppendFormat("delete from CorpSecLic where CorpId='{0}'", id);
                            delCorpSecLicStaff.AppendFormat("delete from CorpSecLicStaff where CorpId='{0}'", id);
                            delCorpTecStaff.AppendFormat("delete from CorpTecStaff where CorpId='{0}'", id);
                            delCorpDevice.AppendFormat("delete from CorpDevice where CorpId='{0}'", id);
                            delCorpResults.AppendFormat("delete from CorpResults where CorpId='{0}'", id);
                            qualCount        = ToolCoreDb.ExecuteSql(delCorpQual.ToString());
                            leaderCount      = ToolCoreDb.ExecuteSql(delCorpLeader.ToString());
                            awardCount       = ToolCoreDb.ExecuteSql(delCorpAward.ToString());
                            certCount        = ToolCoreDb.ExecuteSql(delCorpCert.ToString());
                            punishCount      = ToolCoreDb.ExecuteSql(delCorpPunish.ToString());
                            seclicCount      = ToolCoreDb.ExecuteSql(delCorpSecLic.ToString());
                            seclicstaffCount = ToolCoreDb.ExecuteSql(delCorpSecLicStaff.ToString());
                            tecstaffCount    = ToolCoreDb.ExecuteSql(delCorpTecStaff.ToString());
                            deviceCount      = ToolCoreDb.ExecuteSql(delCorpDevice.ToString());
                            resultCount      = ToolCoreDb.ExecuteSql(delCorpResults.ToString());
                            string corpSql = string.Format("delete from CorpInfo where Id='{0}'", id);
                            infoCount = ToolCoreDb.ExecuteSql(corpSql);
                        }
                        if (infoCount != -1 || !isDel)
                        {
                            if (ToolDb.SaveEntity(info, string.Empty))
                            {
                                if (isDel)
                                {
                                    if (qualCount != -1)
                                    {
                                        AddCorpQual(info, htmldtl);
                                    }
                                    if (awardCount != -1)
                                    {
                                        AddCorpAward(info, htmldtl);
                                    }
                                    if (certCount != -1)
                                    {
                                        AddCorpCert(info, htmldtl);
                                    }
                                    if (deviceCount != -1)
                                    {
                                        AddCorpDevice(info, htmldtl);
                                    }
                                    if (punishCount != -1)
                                    {
                                        AddCorpPunish(info, htmldtl);
                                    }
                                    if (resultCount != -1)
                                    {
                                        AddCorpResults(info, htmldtl);
                                    }
                                    if (seclicCount != -1)
                                    {
                                        AddCorpSecLic(info, htmldtl);
                                    }
                                    if (seclicstaffCount != -1)
                                    {
                                        AddCorpSecLicStaff(info, htmldtl);
                                    }
                                    if (tecstaffCount != -1)
                                    {
                                        AddCorpTecStaff(info, htmldtl);
                                    }
                                    if (leaderCount != -1)
                                    {
                                        AddCorpLeader(info, htmldtl);
                                    }
                                }
                                else
                                {
                                    AddCorpQual(info, htmldtl);
                                    AddCorpAward(info, htmldtl);
                                    AddCorpCert(info, htmldtl);
                                    AddCorpDevice(info, htmldtl);
                                    AddCorpPunish(info, htmldtl);
                                    AddCorpResults(info, htmldtl);
                                    AddCorpSecLic(info, htmldtl);
                                    AddCorpSecLicStaff(info, htmldtl);
                                    AddCorpTecStaff(info, htmldtl);
                                    AddCorpLeader(info, htmldtl);
                                }
                            }
                        }
                        count++;
                        totalCount++;
                        if (count >= 90)
                        {
                            count = 1;
                            Thread.Sleep(700000);
                        }
                    }
                }
            }
            ToolCoreDb.ExecuteProcedure();
            string sql = "update a set a.FkId= c.Id FROM AttenCorp  a left join  CorpInfo c on c.CorpName=A.CorpName";

            ToolDb.ExecuteSql(sql);
            return(null);
        }
Example #5
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            string newUrl = "http://202.104.65.182:8081/G2/gfmweb/web-enterprise!list.do?data&filter_params_=enterpriseId,rowNum,enterpriseBaseId,enterpriseName,organizationCode&defined_operations_=&nocheck_operations_=&";

            string gridSearch           = "true";
            string nd                   = ToolHtml.GetDateTimeLong(DateTime.Now).ToString();
            string PAGESIZE             = "100";
            string PAGE                 = "1";
            string sortField            = "";
            string sortDirection        = "asc";
            string searchVal            = "1";
            string _enterpriseName_like = "公司";
            string entTypeCodes         = "";

            NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] {
                "gridSearch", "nd", "PAGESIZE", "PAGE", "sortField", "sortDirection", "searchVal", "_enterpriseName_like", "entTypeCodes"
            }, new string[] {
                gridSearch, nd, PAGESIZE, PAGE, sortField, sortDirection, searchVal, _enterpriseName_like, entTypeCodes
            });

            string html    = string.Empty;
            int    pageInt = 1;

            try
            {
                html = ToolWeb.GetHtmlByUrl(newUrl, nvc, Encoding.UTF8);
            }
            catch { return(null); }

            JavaScriptSerializer        serializer  = new JavaScriptSerializer();
            Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html);

            string page = smsTypeJson["total"].ToString();

            pageInt = int.Parse(page);

            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    PAGE = i.ToString();
                    nvc  = ToolWeb.GetNameValueCollection(new string[] {
                        "gridSearch", "nd", "PAGESIZE", "PAGE", "sortField", "sortDirection", "searchVal", "_enterpriseName_like", "entTypeCodes"
                    }, new string[] {
                        gridSearch, nd, PAGESIZE, PAGE, sortField, sortDirection, searchVal, _enterpriseName_like, entTypeCodes
                    });
                    try
                    {
                        html        = ToolWeb.GetHtmlByUrl(newUrl, nvc, Encoding.UTF8);
                        smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html);
                    }
                    catch { continue; }
                }

                object[] objList = (object[])smsTypeJson["data"];

                foreach (object obj in objList)
                {
                    Dictionary <string, object> dic = obj as Dictionary <string, object>;

                    string CorpName = string.Empty, CorpCode = string.Empty, CorpAddress = string.Empty,
                           RegDate = string.Empty, RegFund = string.Empty, BusinessCode = string.Empty,
                           BusinessType = string.Empty, LinkMan = string.Empty, LinkPhone = string.Empty,
                           Fax = string.Empty, Email = string.Empty, CorpSite = string.Empty, cUrl = string.Empty, ISOQualNum = string.Empty, ISOEnvironNum = string.Empty, corpType = string.Empty,
                           qualCode = string.Empty, corpMgr = string.Empty, businessMgr = string.Empty, tecMgr = string.Empty;

                    CorpName = Convert.ToString(dic["enterpriseName"]);

                    CorpCode = Convert.ToString(dic["organizationCode"]);
                    string idCode       = Convert.ToString(dic["enterpriseBaseId"]);
                    string enterpriseId = Convert.ToString(dic["enterpriseId"]);
                    cUrl = "http://202.104.65.182:8081/G2/webdrive/web-enterprise!view.do?enterpriseId=" + enterpriseId;

                    //string infoUrl = "http://202.104.65.182:8081/G2/webdrive/web-enterprise-pub!getEnterpriseInfoById.do";
                    //string infoUrl2 = "http://202.104.65.182:8081/G2/webdrive/web-enterprise-pub!menuTree.do";
                    //Dictionary<string, object> dtlInfo = null, dtlInfo2 = null;
                    //string infoJson = string.Empty, infoJson2 = string.Empty;
                    string htmldtl = string.Empty;
                    try
                    {
                        htmldtl = ToolWeb.GetHtmlByUrl(cUrl).GetJsString();
                        //NameValueCollection dtlNvc = ToolWeb.GetNameValueCollection(new string[] {
                        //"enterpriseId","menutype"
                        //}, new string[] { enterpriseId, "" });

                        //infoJson = ToolWeb.GetHtmlByUrl(infoUrl, dtlNvc, Encoding.UTF8);
                        //dtlInfo = (Dictionary<string, object>)serializer.DeserializeObject(infoJson);

                        //dtlNvc = ToolWeb.GetNameValueCollection(new string[] {
                        //"enterpriseId",
                        //"menutype",
                        //"actionFlag"
                        //}, new string[] {
                        //enterpriseId,"",""
                        //});

                        //infoJson2 = ToolWeb.GetHtmlByUrl(infoUrl2, dtlNvc, Encoding.UTF8);
                        //dtlInfo2 = (Dictionary<string, object>)serializer.DeserializeObject(infoJson2);
                    }
                    catch { continue; }

                    CorpAddress = ToolHtml.GetHtmlInputValue(htmldtl, "_M.registerAddress");
                    RegDate     = ToolHtml.GetHtmlInputValue(htmldtl, "_M.registerTime");
                    RegFund     = ToolHtml.GetHtmlInputValue(htmldtl, "_M.licenseCapital");
                    if (!string.IsNullOrEmpty(RegFund))
                    {
                        RegFund += "万元";
                    }
                    BusinessCode = ToolHtml.GetHtmlInputValue(htmldtl, "_M.licenseRegistrationCode");
                    CorpSite     = ToolHtml.GetHtmlInputValue(htmldtl, "_M.firmWebsite");

                    LinkMan   = ToolHtml.GetHtmlInputValue(htmldtl, "_M.name");
                    Email     = ToolHtml.GetHtmlInputValue(htmldtl, "_M.email");
                    LinkPhone = ToolHtml.GetHtmlInputValue(htmldtl, "_M.tel");
                    Fax       = ToolHtml.GetHtmlInputValue(htmldtl, "_M.fax");
                    corpMgr   = ToolHtml.GetHtmlInputValue(htmldtl, "_M.legalPersonName");

                    Parser   parser   = new Parser(new Lexer(htmldtl));
                    NodeList typeNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "g2-cell col-sm-6")));
                    if (typeNode != null && typeNode.Count > 0)
                    {
                        string str = string.Empty;
                        for (int j = 2; j < typeNode.Count; j++)
                        {
                            string semp = typeNode[j].ToNodePlainString();
                            if (!string.IsNullOrEmpty(semp))
                            {
                                try
                                {
                                    DateTime time = DateTime.Parse(semp);
                                    continue;
                                }
                                catch { }
                                str += semp + ",";
                            }
                        }
                        if (!string.IsNullOrEmpty(str))
                        {
                            corpType = str.Remove(str.Length - 1);
                        }
                    }


                    CorpInfo info = ToolDb.GenCorpInfo(CorpName, CorpCode, CorpAddress, RegDate, RegFund, BusinessCode, BusinessType, LinkMan, LinkPhone, Fax, Email, CorpSite, corpType, "广东省", "广东地区", "广东省住房和城乡建设厅", cUrl, ISOQualNum, ISOEnvironNum, string.Empty);

                    string exisSql = string.Format("select Id from CorpInfo where CorpName='{0}' and CorpType='{1}' and InfoSource='{2}'", info.CorpName, info.CorpType, info.InfoSource);

                    string corpId = Convert.ToString(ToolDb.ExecuteScalar(exisSql));

                    if (!string.IsNullOrEmpty(corpId))
                    {
                        string delCorpQual = string.Format("delete from CorpQual where CorpId='{0}'", corpId);
                        string delCorpLeader = string.Format("delete from CorpLeader where CorpId='{0}'", corpId);
                        string delCorpSecLicStaff = string.Format("delete from CorpSecLicStaff where CorpId='{0}'", corpId);
                        int    qualCount = 0, leaderCount = 0, tecstaffCount = 0, infoCount = 0;
                        string corpSql = string.Format("delete from CorpInfo where Id='{0}'", corpId);
                        infoCount     = ToolDb.ExecuteSql(corpSql);
                        qualCount     = ToolDb.ExecuteSql(delCorpQual);
                        leaderCount   = ToolDb.ExecuteSql(delCorpLeader);
                        tecstaffCount = ToolDb.ExecuteSql(delCorpSecLicStaff);

                        if (infoCount > 0)
                        {
                            ToolDb.SaveEntity(info, "");
                        }
                        if (qualCount >= 0)
                        {
                            try
                            {
                                AddCorpQual(info, enterpriseId);
                            }
                            catch (Exception ex) { Logger.Error(ex); }
                        }
                        if (leaderCount >= 0)
                        {
                            try
                            {
                                AddCorpLeader(info, enterpriseId);
                            }
                            catch (Exception ex) { Logger.Error(ex); }
                        }
                        if (tecstaffCount >= 0)
                        {
                            try
                            {
                                AddCorpStaff(info, enterpriseId);
                            }
                            catch (Exception ex) { Logger.Error(ex); }
                        }
                    }
                    else
                    {
                        if (ToolDb.SaveEntity(info, ""))
                        {
                            try
                            {
                                AddCorpLeader(info, enterpriseId);
                            }
                            catch (Exception ex) { Logger.Error(ex); }
                            try
                            {
                                AddCorpQual(info, enterpriseId);
                            }
                            catch (Exception ex) { Logger.Error(ex); }
                            try
                            {
                                AddCorpStaff(info, enterpriseId);
                            }
                            catch (Exception ex) { Logger.Error(ex); }
                        }
                    }
                }
            }

            ToolCoreDb.ExecuteProcedure();
            return(null);
        }
Example #6
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch
            {
                return(null);
            }

            string opValue = string.Empty, leveVlaue = string.Empty;

            string[] levelNode = new string[] {
                "特级", "特级(旧标准)", "一级", "一级(旧标准)", "二级", "二级(旧标准)", "三级", "三级(旧标准)", "暂定三级(旧标准)", "不分等级"
            };
            Parser   parser   = new Parser(new Lexer(html));
            NodeList typeNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("id", "ctl00_ContentPlaceHolder1_ddlENT_SORT_ID")), true), new TagNameFilter("option")));

            if (typeNode != null && typeNode.Count > 0)
            {
                for (int t = 1; t < typeNode.Count; t++)
                {
                    for (int l = 1; l < levelNode.Length; l++)
                    {
                        leveVlaue = levelNode[l];

                        OptionTag opTag = typeNode[t] as OptionTag;
                        opValue = opTag.GetAttribute("value");
                        parser  = new Parser(new Lexer(html));
                        NodeList inputNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("input"), new HasAttributeFilter("id", "ctl00_ContentPlaceHolder1_ValidateCode1_txtRanNum")));
                        string   valiCode  = string.Empty;
                        if (inputNode != null && inputNode.Count > 0)
                        {
                            valiCode = (inputNode[0] as InputTag).GetAttribute("value");
                        }
                        viewState = ToolWeb.GetAspNetViewState(html);
                        NameValueCollection typeNvc = ToolWeb.GetNameValueCollection(
                            new string[] {
                            "ctl00_ContentPlaceHolder1_toolkitScriptManager1_HiddenField",
                            "__EVENTTARGET",
                            "__EVENTARGUMENT",
                            "__LASTFOCUS",
                            "__VIEWSTATE",
                            "ctl00$ContentPlaceHolder1$ddlENT_SORT_ID",
                            "ctl00$ContentPlaceHolder1$ddlRank",
                            "ctl00$ContentPlaceHolder1$txtEnt_name",
                            "ctl00$ContentPlaceHolder1$ValidateCode1$txtValidateCode",
                            "ctl00$ContentPlaceHolder1$ValidateCode1$txtRanNum",
                            "ctl00$ContentPlaceHolder1$btnsearch"
                        },
                            new string[] {
                            "", "", "", "",
                            viewState, opValue, leveVlaue, "", valiCode, valiCode,
                            "搜  索"
                        });
                        try
                        {
                            html = ToolWeb.GetHtmlByUrl(SiteUrl, typeNvc, Encoding.UTF8, ref cookiestr);
                        }
                        catch { continue; }

                        parser = new Parser(new Lexer(html));
                        NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "ctl00_ContentPlaceHolder1_AspNetPager1")));
                        if (pageList != null && pageList.Count > 0)
                        {
                            try
                            {
                                string temp   = pageList[0].ToPlainTextString().GetRegexBegEnd("共", "条");
                                int    page   = int.Parse(temp);
                                int    result = page / 15;
                                if (page % 15 != 0)
                                {
                                    pageInt = result + 1;
                                }
                                else
                                {
                                    pageInt = result;
                                }
                            }
                            catch { pageInt = 1; }
                        }
                        for (int i = 1; i <= pageInt; i++)
                        {
                            if (i > 20)
                            {
                                break;
                            }
                            if (i > 1)
                            {
                                try
                                {
                                    parser = new Parser(new Lexer(html));
                                    NodeList pageInputNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("input"), new HasAttributeFilter("id", "ctl00_ContentPlaceHolder1_ValidateCode1_txtRanNum")));
                                    string   pageValiCode  = string.Empty;
                                    if (pageInputNode != null && pageInputNode.Count > 0)
                                    {
                                        pageValiCode = (pageInputNode[0] as InputTag).GetAttribute("value");
                                    }
                                    viewState = ToolWeb.GetAspNetViewState(html);
                                    NameValueCollection nvc = ToolWeb.GetNameValueCollection(
                                        new string[] {
                                        "ctl00$ContentPlaceHolder1$ddlENT_SORT_ID",
                                        "ctl00$ContentPlaceHolder1$ddlRank",
                                        "ctl00$ContentPlaceHolder1$txtEnt_name",
                                        "ctl00$ContentPlaceHolder1$ValidateCode1$txtRanNum",
                                        "ctl00$ContentPlaceHolder1$ValidateCode1$txtValidateCode",
                                        "ctl00_ContentPlaceHolder1_toolkitScriptManager1_HiddenField",
                                        "__EVENTARGUMENT",
                                        "__EVENTTARGET",
                                        "__LASTFOCUS",
                                        "__VIEWSTATE"
                                    },
                                        new string[] {
                                        opValue,
                                        leveVlaue, "",
                                        pageValiCode,
                                        "", "",
                                        i.ToString(),
                                        "ctl00$ContentPlaceHolder1$AspNetPager1", "",
                                        viewState
                                    }
                                        );
                                    html = ToolWeb.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                                }
                                catch { continue; }
                            }
                            parser = new Parser(new Lexer(html));
                            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "tab_ent")));
                            if (nodeList != null && nodeList.Count > 0)
                            {
                                #region 循环列表
                                TableTag table = nodeList[0] as TableTag;
                                for (int j = 1; j < table.RowCount; j++)
                                {
                                    string CorpName = string.Empty, CorpCode = string.Empty, CorpAddress = string.Empty,
                                           RegDate = string.Empty, RegFund = string.Empty, BusinessCode = string.Empty,
                                           BusinessType = string.Empty, LinkMan = string.Empty, LinkPhone = string.Empty,
                                           Fax = string.Empty, Email = string.Empty, CorpSite = string.Empty, cUrl = string.Empty,
                                           ISOQualNum = string.Empty, ISOEnvironNum = string.Empty, corpType = string.Empty,
                                           qualCode = string.Empty, corpMgr = string.Empty, businessMgr = string.Empty, tecMgr = string.Empty;
                                    string   htlCtx = string.Empty, QualType = string.Empty, CorpLevey = string.Empty;
                                    TableRow tr      = table.Rows[j];
                                    string   qualStr = tr.Columns[2].ToHtml();
                                    CorpName  = tr.Columns[1].ToNodePlainString();
                                    QualType  = tr.Columns[2].ToPlainTextString();
                                    CorpLevey = tr.Columns[3].ToNodePlainString();
                                    qualCode  = tr.Columns[4].ToNodePlainString();
                                    if (QualType == "--")
                                    {
                                        QualType = "";
                                    }
                                    cUrl = "http://113.108.219.40/PlatForm/SearchCenter/" + tr.Columns[1].GetATagHref();

                                    List <string> quaList = new List <string>();
                                    parser = new Parser(new Lexer(tr.Columns[4].ToHtml()));
                                    NodeList quaNodeList = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                    if (quaNodeList != null && quaNodeList.Count > 0)
                                    {
                                        for (int q = 0; q < quaNodeList.Count; q++)
                                        {
                                            quaList.Add("http://113.108.219.40/PlatForm/SearchCenter/" + quaNodeList[q].GetATagHref());
                                        }
                                    }
                                    string quaUrl = "http://113.108.219.40/PlatForm/SearchCenter/" + tr.Columns[4].GetATagHref();
                                    string htldtl = string.Empty;
                                    try
                                    {
                                        htldtl = ToolWeb.GetHtmlByUrl(cUrl, Encoding.UTF8);
                                    }
                                    catch { continue; }

                                    parser = new Parser(new Lexer(htldtl));
                                    NodeList dtlList = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                                    if (dtlList != null && dtlList.Count > 0)
                                    {
                                        TableTag tab = dtlList[0] as TableTag;
                                        string   ctx = string.Empty;
                                        for (int k = 0; k < tab.RowCount; k++)
                                        {
                                            for (int d = 0; d < tab.Rows[k].ColumnCount; d++)
                                            {
                                                if ((d + 1) % 2 == 0)
                                                {
                                                    ctx += tab.Rows[k].Columns[d].ToNodePlainString() + "\r\n";
                                                }
                                                else
                                                {
                                                    ctx += tab.Rows[k].Columns[d].ToNodePlainString().Replace(":", "").Replace(":", "") + ":";
                                                }
                                            }
                                        }


                                        corpType     = ctx.GetRegex(new string[] { "企业类型", "类型" });
                                        CorpAddress  = ctx.GetRegex(new string[] { "企业注册地址", "地址" });
                                        BusinessCode = ctx.GetRegex(new string[] { "营业执照注册号", "注册号" });
                                        RegDate      = ctx.GetRegex(new string[] { "成立时间", "成立日期", "时间", "日期" }).GetDateRegex();
                                        LinkMan      = ctx.GetRegex(new string[] { "企业法定代表人", "法定代表人" });
                                        RegFund      = ctx.GetRegex(new string[] { "注册资金", "资金" });
                                        if (!RegFund.Contains("万"))
                                        {
                                            RegFund += "万";
                                        }
                                        corpMgr = ctx.GetRegex(new string[] { "企业经理" });
                                        if (corpMgr.Contains("暂无"))
                                        {
                                            corpMgr = string.Empty;
                                        }
                                        businessMgr = ctx.GetRegex(new string[] { "经营负责人" });
                                        if (businessMgr.Contains("暂无"))
                                        {
                                            businessMgr = string.Empty;
                                        }
                                        tecMgr = ctx.GetRegex(new string[] { "技术负责人" });
                                        if (tecMgr.Contains("暂无"))
                                        {
                                            tecMgr = string.Empty;
                                        }

                                        CorpInfo info = ToolDb.GenCorpInfo(CorpName, CorpCode, CorpAddress, RegDate, RegFund, BusinessCode, BusinessType, LinkMan, LinkPhone, Fax, Email, CorpSite, corpType, "广东省", "广东地区", "广东省住房和城乡建设厅", cUrl, ISOQualNum, ISOEnvironNum, string.Empty);

                                        string strSql = string.Format("select Id from CorpInfo where CorpName='{0}' and Url='{1}'", info.CorpName, info.Url);
                                        object obj    = ToolDb.ExecuteScalar(strSql);
                                        if (obj != null && obj.ToString() != "")
                                        {
                                            StringBuilder delCorpQual   = new System.Text.StringBuilder();
                                            StringBuilder delCorpLeader = new System.Text.StringBuilder();
                                            delCorpQual.AppendFormat("delete from CorpQual where CorpId='{0}'", obj);
                                            delCorpLeader.AppendFormat("delete from CorpLeader where CorpId='{0}'", obj);
                                            ToolDb.ExecuteSql(delCorpQual.ToString());
                                            ToolDb.ExecuteSql(delCorpLeader.ToString());
                                            string corpSql = string.Format("delete from CorpInfo where Id='{0}'", obj);
                                            ToolCoreDb.ExecuteSql(corpSql);
                                        }

                                        if (ToolDb.SaveEntity(info, string.Empty))
                                        {
                                            if (!string.IsNullOrEmpty(LinkMan))
                                            {
                                                CorpLeader leader = ToolDb.GenCorpLeader(info.Id, LinkMan, "", "企业法定代表人", cUrl);
                                                ToolDb.SaveEntity(leader, string.Empty);
                                            }
                                            if (!string.IsNullOrEmpty(corpMgr))
                                            {
                                                CorpLeader leader = ToolDb.GenCorpLeader(info.Id, corpMgr, "", "企业经理", cUrl);
                                                ToolDb.SaveEntity(leader, string.Empty);
                                            }
                                            if (!string.IsNullOrEmpty(businessMgr))
                                            {
                                                CorpLeader leader = ToolDb.GenCorpLeader(info.Id, businessMgr, "", "经营负责人", cUrl);
                                                ToolDb.SaveEntity(leader, string.Empty);
                                            }
                                            if (!string.IsNullOrEmpty(tecMgr))
                                            {
                                                CorpLeader leader = ToolDb.GenCorpLeader(info.Id, tecMgr, "", "技术负责人", cUrl);
                                                ToolDb.SaveEntity(leader, string.Empty);
                                            }
                                            if (!string.IsNullOrEmpty(qualStr))
                                            {
                                                List <CorpQual> corpQuals = new List <CorpQual>();
                                                string          quaCtx    = string.Empty;
                                                for (int c = 0; c < quaList.Count; c++)
                                                {
                                                    string quaHtl = string.Empty;
                                                    try
                                                    {
                                                        quaHtl = ToolWeb.GetHtmlByUrl(quaList[c], Encoding.UTF8);
                                                    }
                                                    catch { }

                                                    parser = new Parser(new Lexer(quaHtl));
                                                    NodeList quaNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                                                    if (quaNode != null && quaNode.Count > 0)
                                                    {
                                                        TableTag quaTable = quaNode[0] as TableTag;

                                                        for (int k = 0; k < quaTable.RowCount; k++)
                                                        {
                                                            for (int d = 0; d < quaTable.Rows[k].ColumnCount; d++)
                                                            {
                                                                string temp = quaTable.Rows[k].Columns[d].ToNodePlainString();
                                                                //string quatemp = quaTable.Rows[k].ToNodePlainString();
                                                                if ((d + 1) % 2 == 0)
                                                                {
                                                                    quaCtx += temp + "\r\n";
                                                                }
                                                                else
                                                                {
                                                                    quaCtx += temp.Replace(":", "").Replace(":", "") + ":";
                                                                }
                                                            }
                                                        }
                                                    }
                                                    string qualctx = string.Empty;
                                                    parser.Reset();
                                                    NodeList spanNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "lblQuaInfo")));
                                                    if (spanNode != null && spanNode.Count > 0)
                                                    {
                                                        qualctx = spanNode.ToHtml().GetReplace("<br/>,<br />,<br>,</br>", "∈").ToCtxString();
                                                    }
                                                    string str = qualctx.ToLower().Replace("<br/>", "∈").Replace("</br>", "∈").Replace("<br>", "∈");
                                                    str = Regex.Replace(str, "<[^>]*>", "");
                                                    string[] qual = str.Split('∈');
                                                    for (int q = 0; q < qual.Length; q++)
                                                    {
                                                        if (string.IsNullOrEmpty(qual[q]) || qual[q] == "--")
                                                        {
                                                            continue;
                                                        }
                                                        string CorpId = string.Empty, QualName = string.Empty, quaCode = string.Empty, QualSeq = string.Empty, qualNum = string.Empty, QualLevel = string.Empty, ValidDate = string.Empty, LicDate = string.Empty, LicUnit = string.Empty, quaType = string.Empty;
                                                        LicDate   = quaCtx.GetRegex("发证日期,发证时间").GetDateRegex();
                                                        LicUnit   = quaCtx.GetRegex("发证机关,发证机构");
                                                        ValidDate = quaCtx.GetRegex("证书有效期").GetDateRegex();
                                                        quaType   = quaCtx.GetRegex("证书类型");
                                                        string value = qual[q];
                                                        int    len   = value.IndexOf("/");
                                                        if (len != -1)
                                                        {
                                                            QualLevel = value.Substring(len, value.Length - len).Replace("/", "");
                                                            value     = value.Remove(len);
                                                        }
                                                        else
                                                        {
                                                            QualLevel = CorpLevey;
                                                        }
                                                        string[] dtl = value.Split(' ');
                                                        CorpId   = info.Id;
                                                        QualName = dtl[0].Trim();
                                                        if (string.IsNullOrEmpty(QualName))
                                                        {
                                                            QualName = dtl[dtl.Length - 1];
                                                        }
                                                        quaCode = quaCtx.GetRegex("证书编号");//qualCode;
                                                        for (int ty = 1; ty < dtl.Length; ty++)
                                                        {
                                                            quaType += dtl[ty].Trim() + ",";
                                                        }
                                                        if (!string.IsNullOrEmpty(quaType) && quaType.Contains(","))
                                                        {
                                                            quaType = quaType.Substring(0, quaType.Length - 1);
                                                            if (quaType[0] == ',' || quaType[0] == ',')
                                                            {
                                                                quaType = quaType.Substring(1, quaType.Length - 1);
                                                            }
                                                        }
                                                        qualNum = QualLevel.GetLevel();

                                                        CorpQual corpQual = null;
                                                        corpQual = ToolDb.GenCorpQual(info.Id, QualName, quaCode, QualSeq, quaType, QualLevel, ValidDate, LicDate, LicUnit, quaUrl, qualNum, "广东省", "广东地区");
                                                        ToolDb.SaveEntity(corpQual, string.Empty);
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                                #endregion
                            }
                        }
                    }
                }
            }
            ToolCoreDb.ExecuteProcedure();
            return(null);
        }
Example #7
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;
            string pageHtl         = string.Empty;

            try
            {
                html = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch { return(null); }
            Parser   parser      = new Parser(new Lexer(html));
            NodeList enttypeNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("name", "selected2")), true), new TagNameFilter("option")));

            parser.Reset();
            NodeList typeNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("name", "selected")), true), new TagNameFilter("option")));

            if (enttypeNode != null && enttypeNode.Count > 0 && typeNode != null && typeNode.Count > 0)
            {
                for (int t = 0; t < enttypeNode.Count; t++)
                {
                    string entTag  = (enttypeNode[t] as OptionTag).GetAttribute("value");
                    string entText = enttypeNode[t].ToNodePlainString();
                    for (int d = 0; d < typeNode.Count; d++)
                    {
                        string typeTag  = (typeNode[d] as OptionTag).GetAttribute("value");
                        string corpType = typeNode[d].ToNodePlainString();
                        if (t == 1 && d == 0)
                        {
                            typeTag = "16"; corpType = "房地产开发企业";
                        }
                        if (t == 1 && d == 1)
                        {
                            typeTag = "17"; corpType = "预拌商品混凝土企业";
                        }
                        if (t == 1 && d == 2)
                        {
                            typeTag = "19"; corpType = "建筑业施工企业";
                        }

                        try
                        {
                            NameValueCollection nvc = ToolWeb.GetNameValueCollection(
                                new string[] {
                                "pageMethod",
                                "method",
                                "selected2",
                                "selected",
                                "_state",
                                "keyword",
                                "currentPage",
                                "currentPage_temp"
                            },
                                new string[] {
                                "",
                                "searchHandBook",
                                entTag,
                                typeTag,
                                "1",
                                "", "1", "1"
                            });
                            html = ToolWeb.GetHtmlByUrl("http://www.dgjs.gov.cn/dgweb/search.do", nvc, Encoding.UTF8, ref cookiestr);
                        }
                        catch { continue; }

                        parser = new Parser(new Lexer(html));
                        NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "mainNextPage")));
                        if (pageNode != null && pageNode.Count > 0)
                        {
                            try
                            {
                                string temp = pageNode.AsString().GetRegexBegEnd("/", "页").Replace("\r", "").Replace("\t", "").Replace("\n", "");
                                pageInt = int.Parse(temp);
                            }
                            catch { }
                        }
                        for (int i = 1; i <= pageInt; i++)
                        {
                            if (i > 1)
                            {
                                try
                                {
                                    NameValueCollection nvc = ToolWeb.GetNameValueCollection(
                                        new string[] {
                                        "pageMethod",
                                        "method",
                                        "selected2",
                                        "selected",
                                        "_state",
                                        "keyword",
                                        "currentPage",
                                        "currentPage_temp",
                                    },
                                        new string[] {
                                        "next",
                                        "searchHandBook",
                                        entTag,
                                        typeTag,
                                        "1",
                                        "", (i - 1).ToString(), i.ToString()
                                    });
                                    html = ToolWeb.GetHtmlByUrl("http://www.dgjs.gov.cn/dgweb/search.do", nvc, Encoding.UTF8, ref cookiestr);
                                }
                                catch { continue; }
                            }
                            parser = new Parser(new Lexer(html));
                            NodeList tableNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("align", "center")), true), new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "jsxmtb"))));
                            if (tableNode != null && tableNode.Count > 0)
                            {
                                TableTag table = tableNode[0] as TableTag;
                                for (int j = 2; t == 1 ? j <= table.RowCount : j < table.RowCount; j++)
                                {
                                    string CorpName = string.Empty, CorpCode = string.Empty, CorpAddress = string.Empty,
                                           RegDate = string.Empty, RegFund = string.Empty, BusinessCode = string.Empty,
                                           BusinessType = string.Empty, LinkMan = string.Empty, LinkPhone = string.Empty,
                                           Fax = string.Empty, Email = string.Empty, CorpSite = string.Empty, cUrl = string.Empty,
                                           ISOQualNum = string.Empty, ISOEnvironNum = string.Empty, CorpLevey = string.Empty;

                                    TableRow tr = null;

                                    #region 信用手册
                                    if (entText.Contains("手册"))
                                    {
                                        tr          = table.Rows[j];
                                        CorpName    = tr.Columns[1].ToNodePlainString();
                                        LinkMan     = tr.Columns[3].ToNodePlainString();
                                        CorpAddress = tr.Columns[5].ToNodePlainString();
                                        CorpLevey   = tr.Columns[2].ToNodePlainString();
                                        if (corpType.Contains("担保企业"))
                                        {
                                            cUrl = "http://www.dgjs.gov.cn/dgweb/" + tr.Columns[10].GetATagHref();
                                        }
                                        else
                                        {
                                            cUrl = "http://www.dgjs.gov.cn/dgweb/" + tr.Columns[9].GetATagHref();
                                        }
                                        string htlDtl = string.Empty;
                                        try
                                        {
                                            htlDtl = ToolWeb.GetHtmlByUrl(cUrl, Encoding.UTF8).GetJsString();
                                        }
                                        catch { continue; }

                                        parser = new Parser(new Lexer(htlDtl.Replace("th", "td").Replace("TH", "TD")));
                                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "dgjsj")), true), new TagNameFilter("table")));
                                        if (dtlNode != null && dtlNode.Count > 1)
                                        {
                                            string   ctx      = string.Empty;
                                            TableTag dtlTable = dtlNode[0] as TableTag;
                                            for (int c = 1; c < dtlTable.RowCount; c++)
                                            {
                                                for (int v = 0; v < dtlTable.Rows[c].ColumnCount; v++)
                                                {
                                                    if (string.IsNullOrEmpty(dtlTable.Rows[c].Columns[v].ToNodePlainString()))
                                                    {
                                                        continue;
                                                    }
                                                    if ((v + 1) % 2 == 0)
                                                    {
                                                        ctx += dtlTable.Rows[c].Columns[v].ToNodePlainString() + "\r\n";
                                                    }
                                                    else
                                                    {
                                                        ctx += dtlTable.Rows[c].Columns[v].ToNodePlainString() + ":";
                                                    }
                                                }
                                            }

                                            RegDate      = ctx.GetRegex("设立时间,设立日期");
                                            LinkPhone    = ctx.GetRegex("联系电话");
                                            Fax          = ctx.GetRegex("传真");
                                            Email        = ctx.GetRegex("电子邮箱");
                                            BusinessType = ctx.GetRegex("经济性质");
                                            BusinessCode = ctx.GetRegex("营业执照注册号");
                                        }
                                        CorpInfo info = ToolDb.GenCorpInfo(CorpName, CorpCode, CorpAddress, RegDate, RegFund, BusinessCode, BusinessType, LinkMan, LinkPhone, Fax, Email, CorpSite, corpType, "广东省", "东莞市", "东莞市住房和城乡建设局", cUrl, ISOQualNum, ISOEnvironNum, string.Empty);
                                        if (!string.IsNullOrEmpty(CorpName.GetNotChina()))
                                        {
                                            string strSql = string.Format("select Id from CorpInfo where CorpName='{0}' and InfoSource='{1}' and CorpType='{2}'", info.CorpName, info.InfoSource, info.CorpType);
                                            object obj    = ToolDb.ExecuteScalar(strSql);
                                            if (obj != null && obj.ToString() != "")
                                            {
                                                StringBuilder delCorpQual     = new System.Text.StringBuilder();
                                                StringBuilder delCorpLeader   = new System.Text.StringBuilder();
                                                StringBuilder delCorpTecStaff = new System.Text.StringBuilder();
                                                delCorpQual.AppendFormat("delete from CorpQual where CorpId='{0}'", obj);
                                                delCorpLeader.AppendFormat("delete from CorpLeader where CorpId='{0}'", obj);
                                                delCorpTecStaff.AppendFormat("delete from CorpTecStaff where CorpId='{0}'", obj);
                                                ToolDb.ExecuteSql(delCorpQual.ToString());
                                                ToolDb.ExecuteSql(delCorpLeader.ToString());
                                                ToolDb.ExecuteSql(delCorpTecStaff.ToString());
                                                string corpSql = string.Format("delete from CorpInfo where Id='{0}'", obj);
                                                ToolCoreDb.ExecuteSql(corpSql);
                                            }
                                            if (ToolDb.SaveEntity(info, string.Empty))
                                            {
                                                object corpId = ToolDb.ExecuteScalar("select Id from CorpInfo where Url='" + info.Url + "' and InfoSource='东莞市住房和城乡建设局' ");

                                                ToolDb.ExecuteSql("delete from CorpQual where CorpId='" + corpId + "'");

                                                #region 企业资质
                                                TableTag quaTable = dtlNode[1] as TableTag;
                                                for (int q = 2; q < quaTable.RowCount; q++)
                                                {
                                                    TableRow quaTr = quaTable.Rows[q];
                                                    string   CorpId = string.Empty, QualName = string.Empty, QualCode = string.Empty, QualSeq = string.Empty, qualNum = string.Empty, QualType = string.Empty, QualLevel = string.Empty, ValidDate = string.Empty, LicDate = string.Empty, LicUnit = string.Empty;
                                                    CorpId    = info.Id;
                                                    QualName  = quaTr.Columns[0].ToNodePlainString();
                                                    QualLevel = quaTr.Columns[1].ToNodePlainString();
                                                    QualCode  = quaTr.Columns[5].ToNodePlainString();
                                                    LicUnit   = quaTr.Columns[6].ToNodePlainString();
                                                    QualType  = quaTr.Columns[0].ToNodePlainString();
                                                    ValidDate = quaTr.Columns[3].ToPlainTextString().GetDateRegex();
                                                    qualNum   = QualLevel.GetLevel();

                                                    CorpQual qual = ToolDb.GenCorpQual(CorpId, QualName, QualCode, QualSeq, QualType, QualLevel, ValidDate, LicDate, LicUnit, cUrl, qualNum, "广东省", "东莞市");

                                                    ToolDb.SaveEntity(qual, "");
                                                }
                                                #endregion

                                                #region 企业负责人
                                                parser = new Parser(new Lexer(htlDtl));
                                                NodeList leaderNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "head2")));
                                                if (leaderNode != null && leaderNode.Count > 0)
                                                {
                                                    ToolDb.ExecuteSql("delete from CorpLeader where CorpId='" + corpId + "'");
                                                    ATag leaderTag = leaderNode.GetATag(1);
                                                    if (!leaderTag.LinkText.Contains("负责人"))
                                                    {
                                                        leaderTag = leaderNode.GetATag(2);
                                                    }
                                                    if (!leaderTag.LinkText.Contains("负责人"))
                                                    {
                                                        leaderTag = leaderNode.GetATag(3);
                                                    }
                                                    if (!leaderTag.LinkText.Contains("负责人"))
                                                    {
                                                        leaderTag = leaderNode.GetATag(4);
                                                    }
                                                    if (leaderTag.LinkText.Contains("负责人"))
                                                    {
                                                        string leaderUrl = "http://www.dgjs.gov.cn/dgweb/" + leaderTag.Link;
                                                        string leaderDtl = string.Empty;
                                                        try
                                                        {
                                                            leaderDtl = ToolWeb.GetHtmlByUrl(leaderUrl, Encoding.UTF8).GetJsString();
                                                        }
                                                        catch { }

                                                        parser = new Parser(new Lexer(leaderDtl));
                                                        NodeList leaderDtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "dgjsj")), true), new TagNameFilter("table")));

                                                        if (leaderDtlNode != null && leaderDtlNode.Count > 0)
                                                        {
                                                            TableTag leaderTable = leaderDtlNode[0] as TableTag;
                                                            for (int l = 3; l < leaderTable.RowCount; l++)
                                                            {
                                                                TableRow leaderTr = leaderTable.Rows[l];

                                                                if (leaderTr.ToHtml().ToLower().Contains("none"))
                                                                {
                                                                    continue;
                                                                }
                                                                string LeaderName = string.Empty, LeaderDuty = string.Empty, LeaderType = string.Empty, htlCtx = string.Empty;
                                                                try
                                                                {
                                                                    LeaderName = leaderTr.Columns[0].ToNodePlainString();
                                                                    LeaderDuty = leaderTr.Columns[4].ToNodePlainString();

                                                                    LeaderType = leaderTr.Columns[1].ToNodePlainString();
                                                                }
                                                                catch
                                                                { }
                                                                if (!string.IsNullOrEmpty(LeaderName))
                                                                {
                                                                    CorpLeader corpLeader = ToolDb.GenCorpLeader(info.Id, LeaderName, LeaderDuty, LeaderType, leaderUrl);
                                                                    ToolDb.SaveEntity(corpLeader, string.Empty);
                                                                }
                                                            }
                                                        }
                                                    }
                                                }
                                                #endregion

                                                #region 企业技术力量
                                                parser = new Parser(new Lexer(htlDtl));
                                                NodeList tecNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "head2")));
                                                if (tecNode != null && tecNode.Count > 0)
                                                {
                                                    ToolDb.ExecuteSql("delete from CorpTecStaff where CorpId='" + corpId + "'");
                                                    ATag leaderTag = tecNode.GetATag(1);
                                                    if (!leaderTag.LinkText.Contains("技术"))
                                                    {
                                                        leaderTag = tecNode.GetATag(2);
                                                    }
                                                    if (!leaderTag.LinkText.Contains("技术"))
                                                    {
                                                        leaderTag = tecNode.GetATag(3);
                                                    }
                                                    if (!leaderTag.LinkText.Contains("技术"))
                                                    {
                                                        leaderTag = tecNode.GetATag(4);
                                                    }
                                                    if (!leaderTag.LinkText.Contains("技术"))
                                                    {
                                                        leaderTag = tecNode.GetATag(5);
                                                    }
                                                    if (leaderTag.LinkText.Contains("技术"))
                                                    {
                                                        string leaderUrl = "http://www.dgjs.gov.cn/dgweb/" + leaderTag.Link;
                                                        string leaderDtl = string.Empty;
                                                        try
                                                        {
                                                            leaderDtl = ToolWeb.GetHtmlByUrl(leaderUrl, Encoding.UTF8).GetJsString();
                                                        }
                                                        catch { }

                                                        parser = new Parser(new Lexer(leaderDtl));
                                                        NodeList leaderDtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "maintable")));

                                                        if (leaderDtlNode != null && leaderDtlNode.Count > 0)
                                                        {
                                                            TableTag leaderTable = leaderDtlNode[0] as TableTag;
                                                            for (int l = 2; l < leaderTable.RowCount - 1; l++)
                                                            {
                                                                TableRow leaderTr = leaderTable.Rows[l];

                                                                string StaffName = string.Empty, IdCard = string.Empty, CertLevel = string.Empty, CertNo = string.Empty, stffType = string.Empty;
                                                                try
                                                                {
                                                                    StaffName = leaderTr.Columns[1].ToNodePlainString();
                                                                    stffType  = leaderTr.Columns[6].ToNodePlainString();
                                                                    if (stffType == "/")
                                                                    {
                                                                        stffType = null;
                                                                    }
                                                                    CertNo = leaderTr.Columns[8].ToNodePlainString();
                                                                }
                                                                catch { }
                                                                if (!string.IsNullOrEmpty(StaffName))
                                                                {
                                                                    CorpTecStaff staff = ToolDb.GenCorpTecStaff(info.Id, StaffName, IdCard, CertLevel, CertNo, leaderUrl, stffType);
                                                                    ToolDb.SaveEntity(staff, string.Empty);
                                                                }
                                                            }
                                                        }
                                                    }
                                                }
                                                #endregion
                                            }
                                        }
                                    }
                                    #endregion

                                    #region 资质证书企业
                                    else
                                    {
                                        tr = table.Rows[j - 1];
                                        try
                                        {
                                            CorpName    = tr.Columns[0].ToNodePlainString();
                                            CorpAddress = tr.Columns[1].ToNodePlainString();
                                            LinkMan     = tr.Columns[2].ToNodePlainString();
                                            CorpInfo info1 = ToolDb.GenCorpInfo(CorpName, CorpCode, CorpAddress, RegDate, RegFund, BusinessCode, BusinessType, LinkMan, LinkPhone, Fax, Email, CorpSite, corpType, "广东省", "东莞市", "东莞市住房和城乡建设局", cUrl, ISOQualNum, ISOEnvironNum, string.Empty);
                                            if (!string.IsNullOrEmpty(CorpName.GetNotChina()))
                                            {
                                                string strSql = string.Format("select Id from CorpInfo where CorpName='{0}' and InfoSource='{1}' and CorpType='{2}'", info1.CorpName, info1.InfoSource, info1.CorpType);
                                                object obj    = ToolDb.ExecuteScalar(strSql);
                                                if (obj != null && obj.ToString() != "")
                                                {
                                                    string corpSql = string.Format("delete from CorpInfo where Id='{0}'", obj);
                                                    ToolCoreDb.ExecuteSql(corpSql);
                                                }
                                                ToolDb.SaveEntity(info1, string.Empty);
                                            }
                                        }
                                        catch (Exception ex) { }
                                    }
                                    #endregion
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
Example #8
0
        protected override System.Collections.IList ExecuteCrawl(bool crawlAll)
        {
            IList  list = new List <CorpInfo>();
            string html = string.Empty;
            string cookiestr = string.Empty;
            string viewState = string.Empty;
            int    pageInt = 1, sqlCount = 0;
            string eventValidation = string.Empty;

            try
            {
                html = ToolWeb.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "TABLEPANE")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("共", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = ToolWeb.GetHtmlByUrl(this.SiteUrl + "&pages=" + i, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "resultset")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        TableRow tr = table.Rows[j];
                        string   CorpName = string.Empty, CorpCode = string.Empty, CorpAddress = string.Empty,
                                 RegDate = string.Empty, RegFund = string.Empty, BusinessCode = string.Empty,
                                 BusinessType = string.Empty, LinkMan = string.Empty, LinkPhone = string.Empty,
                                 Fax = string.Empty, Email = string.Empty, CorpSite = string.Empty, cUrl = string.Empty,
                                 ISOQualNum = string.Empty, ISOEnvironNum = string.Empty, OffAdr = string.Empty, Cert = string.Empty, ctxKc = string.Empty,
                                 corpRz = string.Empty, corpType = string.Empty;

                        CorpName = tr.Columns[2].ToNodePlainString();
                        if (string.IsNullOrWhiteSpace(CorpName))
                        {
                            continue;
                        }
                        CorpCode  = tr.Columns[3].ToNodePlainString();
                        corpType  = "物业管理企业";
                        LinkMan   = tr.Columns[5].ToNodePlainString();
                        LinkPhone = tr.Columns[6].ToNodePlainString();
                        cUrl      = "http://61.144.226.3:92/WyjgInsideWeb/e-business/prg/signup/CreditInfoBase.jsp?EventID=CreditInfo&QYFRDM=" + CorpCode;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = ToolWeb.GetHtmlByUrl(cUrl, Encoding.Default);
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("SCRIPT"), new HasAttributeFilter("language", "javascript")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            string ctx = dtlNode.AsString();
                            CorpAddress  = ctx.GetRegexBegEnd("qyxxdz.value", ";").GetReplace(new string[] { "=", "'" });
                            CorpSite     = ctx.GetRegexBegEnd("qywz.value", ";").GetReplace(new string[] { "=", "'" });
                            Email        = ctx.GetRegexBegEnd("dzxx.value", ";").GetReplace(new string[] { "=", "'" });
                            BusinessCode = ctx.GetRegexBegEnd("yyzzzch.value", ";").GetReplace(new string[] { "=", "'" });
                            RegFund      = ctx.GetRegexBegEnd("zczb.value", ";").GetReplace(new string[] { "=", "'", "人民币" });
                            RegDate      = ctx.GetRegexBegEnd("qyclsj.value", ";").GetReplace(new string[] { "=", "'" });
                            BusinessType = ctx.GetRegexBegEnd("qydjzclx.value", ";").GetReplace(new string[] { "=", "'" });

                            CorpInfo info = ToolDb.GenCorpInfo(CorpName, CorpCode, CorpAddress, RegDate, RegFund, BusinessCode, BusinessType, LinkMan, LinkPhone, Fax, Email, CorpSite, corpType, "广东省", "深圳市", "深圳市住房和建设局", cUrl, ISOQualNum, ISOEnvironNum, OffAdr);

                            string result = Convert.ToString(ToolDb.ExecuteScalar(string.Format("select Id from CorpInfo where CorpName='{0}' and CorpType='{1}' and CorpCode='{2}' and InfoSource='{3}'", info.CorpName, info.CorpType, info.CorpCode, info.InfoSource)));

                            if (string.IsNullOrEmpty(result))
                            {
                                AddCorpInfo(info, ctx);
                            }
                            else
                            {
                                string delQual   = string.Format("delete from CorpQual where CorpId='{0}'", result);
                                string delCorp   = string.Format("delete from CorpInfo where Id='{0}'", result);
                                int    delResult = 0;
                                if (ToolCoreDb.ExecuteSql(delQual) > 0)
                                {
                                    delResult = ToolCoreDb.ExecuteSql(delCorp);
                                }
                                if (delResult > 0)
                                {
                                    AddCorpInfo(info, ctx);
                                }
                            }
                            sqlCount++;
                            if (sqlCount >= 90)
                            {
                                sqlCount = 0;
                                Thread.Sleep(11 * 60 * 1000);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Example #9
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "ContentPlaceHolder1_aspnetPager1")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    string temp = pageList[0].ToPlainTextString().GetRegexBegEnd("/", "页");;
                    pageInt = int.Parse(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    NameValueCollection nvc = ToolWeb.GetNameValueCollection(
                        new string[] { "searchStr", "currentPage", "pageSize", "tab", "kind" },
                        new string[] { string.Empty, i.ToString(), "15", "2", "zzxx" }
                        );
                    html = ToolWeb.GetHtmlByUrl("http://119.145.135.38/fscx/web/tab3List.do", nvc, Encoding.Default);
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "data-table2")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = null;
                    if (nodeList.Count > 1)
                    {
                        table = nodeList[1] as TableTag;
                    }
                    else
                    {
                        table = nodeList[0] as TableTag;
                    }
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string CorpName = string.Empty, CorpCode = string.Empty, CorpAddress = string.Empty,
                               RegDate = string.Empty, RegFund = string.Empty, BusinessCode = string.Empty,
                               BusinessType = string.Empty, LinkMan = string.Empty, LinkPhone = string.Empty,
                               Fax = string.Empty, Email = string.Empty, CorpSite = string.Empty, cUrl = string.Empty,
                               ISOQualNum = string.Empty, ISOEnvironNum = string.Empty, CorpType = string.Empty;
                        TableRow tr = table.Rows[j];
                        CorpName = tr.Columns[0].ToNodePlainString();
                        CorpType = tr.Columns[1].ToNodePlainString();
                        Regex  regexLink = new Regex(@"\?id=[^&]+");
                        string temp      = tr.GetAttribute("onclick").GetRegexBegEnd("'", "'");
                        string ids       = regexLink.Match(temp).Value;
                        cUrl = "http://119.145.135.38/fscx/web/tab3Detail.do" + ids;
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = ToolWeb.GetHtmlByUrl(cUrl, Encoding.Default);
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "tabs-1")), true), new TagNameFilter("table")));
                        if (dtList != null && dtList.Count > 0)
                        {
                            string   ctx = string.Empty;
                            TableTag tab = dtList[0] as TableTag;
                            for (int d = 0; d < tab.RowCount; d++)
                            {
                                for (int k = 0; k < tab.Rows[d].ColumnCount; k++)
                                {
                                    if ((k + 1) % 2 == 0)
                                    {
                                        ctx += tab.Rows[d].Columns[k].ToNodePlainString() + "\r\n";
                                    }
                                    else
                                    {
                                        ctx += tab.Rows[d].Columns[k].ToNodePlainString() + ":";
                                    }
                                }
                            }

                            CorpCode     = ctx.GetRegex("组织机构代码,机构代码");
                            BusinessCode = ctx.GetRegex("营业执照注册号");
                            BusinessType = ctx.GetRegex("注册经济类别");
                            RegFund      = ctx.GetRegex("注册资本(万元),注册资本,注册资金", false).Replace("(万元)", "").Replace("(万)", "").Replace("万元", "").Replace("万", "");
                            RegDate      = ctx.GetRegex("成立日期,成立时间,设立日期,设立时间");
                            CorpAddress  = ctx.GetRegex("注册地址");
                            LinkMan      = ctx.GetRegex("法定代表人,联系人");
                            LinkPhone    = ctx.GetRegex("联系电话");
                            Fax          = ctx.GetRegex("传真");
                            Email        = ctx.GetRegex("电子邮箱");
                            CorpSite     = ctx.GetRegex("企业网址");
                            if (RegDate.Contains("000"))
                            {
                                RegDate = "";
                            }
                            if (!RegFund.Contains("万"))
                            {
                                RegFund += "万";
                            }
                            CorpInfo info = ToolDb.GenCorpInfo(CorpName, CorpCode, CorpAddress, RegDate, RegFund, BusinessCode, BusinessType, LinkMan, LinkPhone, Fax, Email, CorpSite, CorpType, "广东省", "佛山市", "佛山市住房和城乡建设管理局", cUrl, ISOQualNum, ISOEnvironNum, string.Empty);

                            string strSql = string.Format("select Id from CorpInfo where CorpName='{0}' and InfoSource='{1}'", info.CorpName, info.InfoSource);
                            object obj    = ToolDb.ExecuteScalar(strSql);
                            if (obj != null && obj.ToString() != "")
                            {
                                StringBuilder delCorpQual    = new System.Text.StringBuilder();
                                StringBuilder delCorpResults = new System.Text.StringBuilder();
                                StringBuilder delCorpSecLic  = new System.Text.StringBuilder();
                                StringBuilder delCorpPunish  = new StringBuilder();
                                delCorpQual.AppendFormat("delete from CorpQual where CorpId='{0}'", obj);
                                delCorpResults.AppendFormat("delete from CorpResults where CorpId='{0}'", obj);
                                delCorpSecLic.AppendFormat("delete from CorpSecLic where CorpId='{0}'", obj);
                                delCorpPunish.AppendFormat("delete from CorpPunish where CorpId='{0}'", obj);
                                ToolDb.ExecuteSql(delCorpQual.ToString());
                                ToolDb.ExecuteSql(delCorpResults.ToString());
                                ToolDb.ExecuteSql(delCorpSecLic.ToString());
                                ToolDb.ExecuteSql(delCorpPunish.ToString());
                                string corpSql = string.Format("delete from CorpInfo where Id='{0}'", obj);
                                ToolCoreDb.ExecuteSql(corpSql);
                            }

                            if (ToolDb.SaveEntity(info, string.Empty))
                            {
                                parser = new Parser(new Lexer(htldtl));
                                NodeList qualList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "tabs1-1")), true), new TagNameFilter("table")));
                                if (qualList != null && qualList.Count > 0)
                                {
                                    AddQual(qualList[0] as TableTag, info.Id, info.Url);
                                }

                                parser = new Parser(new Lexer(htldtl));
                                NodeList secLicList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "tabs5-1")), true), new TagNameFilter("table")));
                                if (secLicList != null && secLicList.Count > 0)
                                {
                                    AddCorpSecLic(secLicList[0] as TableTag, info.Id, info.Url);
                                }

                                parser = new Parser(new Lexer(htldtl));
                                NodeList resultsList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "tabs2-1")), true), new TagNameFilter("table")));
                                if (resultsList != null && resultsList.Count > 0)
                                {
                                    AddCorpResults(resultsList[0] as TableTag, info.Id, info.Url);
                                }

                                parser = new Parser(new Lexer(htldtl));
                                NodeList PunishList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "tabs7-1")), true), new TagNameFilter("table")));
                                if (PunishList != null && PunishList.Count > 0)
                                {
                                    AddCorpPunish(PunishList[0] as TableTag, info.Id, info.Url);
                                }
                            }
                        }
                    }
                }
            }
            ToolCoreDb.ExecuteProcedure();
            return(null);
        }