示例#1
0
        protected void AddCorpTecStaff(CorpInfo info, string infoUrl)
        {
            string htmldtl = string.Empty;

            try
            {
                htmldtl = ToolWeb.GetHtmlByUrl(infoUrl);
            }
            catch { }
            Parser   parser   = new Parser(new Lexer(htmldtl));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));

            if (nodeList != null && nodeList.Count > 0)
            {
                TableTag table = nodeList[0] as TableTag;
                for (int i = 2; i < table.RowCount; i++)
                {
                    if (table.Rows[i].ColumnCount <= 1)
                    {
                        break;
                    }
                    string StaffName = string.Empty, IdCard = string.Empty, CertLevel = string.Empty, CertNo = string.Empty, stffType = string.Empty;

                    TableRow tr = table.Rows[i];
                    StaffName = tr.Columns[1].ToNodePlainString();
                    IdCard    = tr.Columns[2].ToNodePlainString();
                    CertLevel = tr.Columns[3].ToNodePlainString();
                    CertNo    = tr.Columns[4].ToNodePlainString();
                    stffType  = tr.Columns[5].ToNodePlainString();

                    CorpTecStaff staff = ToolDb.GenCorpTecStaff(info.Id, StaffName, IdCard, CertLevel, CertNo, info.Url, stffType);
                    ToolDb.SaveEntity(staff, string.Empty);
                }
            }
        }
示例#2
0
        protected List <ProvInfo> GetCity()
        {
            List <ProvInfo> citys = ToolFile.Deserialize <ProvInfo>(ToolFile.WebCityPath);

            if (citys == null || citys.Count < 1)
            {
                citys = new List <ProvInfo>();
                string url  = "http://jzsc.mohurd.gov.cn/asite/region/index";
                string html = string.Empty;
                try
                {
                    html = ToolWeb.GetHtmlByUrl(url);
                }
                catch { }
                JavaScriptSerializer        serializer  = new JavaScriptSerializer();
                Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html);
                object[] objs = (object[])(((Dictionary <string, object>)((Dictionary <string, object>)smsTypeJson["json"])["category"])["provinces"]);
                foreach (object obj in objs)
                {
                    Dictionary <string, object> dic = (Dictionary <string, object>)obj;
                    ProvInfo info = new ProvInfo();
                    info.RegionId       = Convert.ToString(dic["region_id"]);
                    info.RegionName     = Convert.ToString(dic["region_name"]);
                    info.RegionFullName = Convert.ToString(dic["region_fullname"]);
                    citys.Add(info);
                }
                citys = citys.OrderBy(x => x.RegionName).ToList();
                ToolFile.Serialize <ProvInfo>(citys, ToolFile.WebCityPath);
            }
            return(citys);
        }
示例#3
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list     = new ArrayList();
            string htl      = string.Empty;
            int    sqlCount = 0;

            try
            {
                htl = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(htl));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "list_table")));

            if (nodeList.Count > 0)
            {
                string CorpName = string.Empty, CorpType = string.Empty, Behavior = string.Empty, BehaviorCtx = string.Empty, BeginDate = string.Empty,
                       Othery1 = string.Empty, othery2 = string.Empty, othery3 = string.Empty, infoUrl = string.Empty;
                TableTag table = (TableTag)nodeList[0];
                for (int j = 1; j < table.RowCount; j++)
                {
                    TableRow tr = table.Rows[j];
                    CorpName  = tr.Columns[1].ToPlainTextString().Trim();
                    CorpType  = tr.Columns[2].ToPlainTextString().Trim();
                    Behavior  = tr.Columns[3].ToPlainTextString().Trim();
                    BeginDate = tr.Columns[4].ToPlainTextString().Trim();
                    ATag aTag = tr.Columns[3].SearchFor(typeof(ATag), true)[0] as ATag;
                    infoUrl = "http://61.144.226.2/CXDA_BLXW/Detail.aspx?Doc_ID=" + aTag.Link.Replace("GoAttachView('", "").Replace("');", "").Trim();
                    string htmldetail = string.Empty;
                    try
                    {
                        htmldetail = ToolWeb.GetHtmlByUrl(ToolWeb.UrlEncode(infoUrl), Encoding.GetEncoding("GB2312")).Replace("= 602;", "罚");
                    }
                    catch (Exception)
                    {
                        continue;
                    }
                    Parser   dtlparser = new Parser(new Lexer(htmldetail));
                    NodeList dtnode    = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "Table1"), new TagNameFilter("table")));
                    if (dtnode.Count > 0)
                    {
                        BehaviorCtx = dtnode.AsString().Replace("\t", "").Replace("&nbsp;", "").Replace("\r\n", "").Trim();
                        CorpBehavior info = ToolDb.GenCorpBehavior(CorpName, CorpType, Behavior, BehaviorCtx, infoUrl, string.Empty, string.Empty, BeginDate);
                        if (sqlCount <= this.MaxCount)
                        {
                            ToolDb.SaveEntity(info, this.ExistCompareFields);
                            sqlCount++;
                        }
                        else
                        {
                            return(list);
                        }
                    }
                }
            }
            return(list);
        }
示例#4
0
        protected void SetTemp()
        {
            string         keyEncrypt        = "KdNszj.Bdimp.WebApi.AzdgKEY";
            KdAzdgHelper   azdg              = new KdAzdgHelper(keyEncrypt);
            string         cookies           = string.Empty;
            IWebHttpClient httpClient        = new WebHttpClient();
            Uri            url               = new Uri("http://localhost:7434/Home/Login");
            string         json              = ToolWeb.GetHtmlByUrl(url.ToString(), Encoding.UTF8, ref cookies);
            Dictionary <string, string> dict = JsonConvert.DeserializeObject <Dictionary <string, string> >(json);
            string mrsa        = dict["mrsa"];
            string ersa        = dict["ersa"];
            string auths_Token = dict["auths_Token"];
            string userName    = azdg.Encrypt("adminer");

            string userPwd2         = DESEncrypt.GenerateMD5("1");
            string userPwd          = azdg.Encrypt(userPwd2);
            NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] {
                "userName",
                "userPwd",
                "mrsa",
                "ersa",
                "auths_Token"
            },
                                                                     new string[] {
                userName,
                userPwd,
                mrsa,
                ersa,
                auths_Token
            });
            string result = ToolWeb.GetHtmlByUrl(url.ToString(), nvc, Encoding.UTF8, ref cookies);
        }
示例#5
0
        protected void AddCorpQual(CorpInfo info, string infoUrl)
        {
            string htmldtl = string.Empty;

            try
            {
                htmldtl = ToolWeb.GetHtmlByUrl(infoUrl);
            }
            catch { }
            Parser   parser   = new Parser(new Lexer(htmldtl));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));

            if (nodeList != null && nodeList.Count > 0)
            {
                TableTag table = nodeList[0] as TableTag;
                for (int i = 1; i < table.RowCount; i++)
                {
                    if (table.Rows[i].ColumnCount <= 1)
                    {
                        break;
                    }
                    string CorpId = string.Empty, QualName = string.Empty, QualCode = string.Empty, QualSeq = string.Empty, qualNum = string.Empty, QualType = string.Empty, QualLevel = string.Empty, ValidDate = string.Empty, LicDate = string.Empty, LicUnit = string.Empty;

                    TableRow tr = table.Rows[i];
                    QualType = tr.Columns[1].ToNodePlainString();
                    QualCode = tr.Columns[2].ToNodePlainString();
                    string name = tr.Columns[3].ToNodePlainString();
                    LicDate   = tr.Columns[4].ToPlainTextString().GetDateRegex();
                    ValidDate = tr.Columns[5].ToPlainTextString().GetDateRegex();
                    LicUnit   = tr.Columns[6].ToNodePlainString();

                    if (name.Contains("不分"))
                    {
                        QualName  = name.Remove(name.IndexOf("不分"));
                        QualLevel = "不分级";
                    }
                    else if (name.Contains("暂定级"))
                    {
                        QualName  = name.Remove(name.IndexOf("暂定级"));
                        QualLevel = "不分级";
                    }
                    else if (!string.IsNullOrWhiteSpace(name) && name.Length > 2)
                    {
                        QualLevel = name.Substring(name.Length - 2, 2);
                        QualName  = name.Remove(name.Length - 2, 2);
                    }

                    if (QualType.Contains("监理"))
                    {
                        QualName = QualName + "监理";
                    }
                    qualNum = QualLevel.GetLevel();

                    CorpQual qual = ToolDb.GenCorpQual(info.Id, QualName, QualCode, QualSeq, QualType, QualLevel, ValidDate, LicDate, LicUnit, info.Url, qualNum, info.Province, info.City);
                    ToolDb.SaveEntity(qual, string.Empty);
                }
            }
        }
示例#6
0
        protected void AddCorpStaff(CorpInfo info, string enterpriseId)
        {
            string url = "http://202.104.65.182:8081/G2/webdrive/web-person-info.do?enterpriseId=" + enterpriseId + "&enterpriseBaseId=&data&filter_params_=rowNum,personId,personBaseId,name,isPause,isDel&defined_operations_=&nocheck_operations_=&";

            string gridSearch       = "false";
            string nd               = ToolHtml.GetDateTimeLong(DateTime.Now).ToString();
            string PAGESIZE         = "1000";
            string PAGE             = "1";
            string sortField        = "";
            string sortDirection    = "asc";
            NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] {
                "gridSearch", "nd", "PAGESIZE", "PAGE", "sortField", "sortDirection"
            }, new string[] {
                gridSearch, nd, PAGESIZE, PAGE, sortField, sortDirection
            });
            string strJson = string.Empty;

            try
            {
                strJson = ToolWeb.GetHtmlByUrl(url, nvc);
            }
            catch { return; }

            JavaScriptSerializer        serializer  = new JavaScriptSerializer();
            Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(strJson);

            object[] objList = smsTypeJson["data"] as object[];
            if (objList != null)
            {
                foreach (object obj in objList)
                {
                    string StaffName = string.Empty, IdCard = string.Empty, CertLevel = string.Empty, CertNo = string.Empty, stffType = string.Empty;

                    Dictionary <string, object> dic = obj as Dictionary <string, object>;
                    StaffName = Convert.ToString(dic["name"]);

                    string dtlUrl = "http://202.104.65.182:8081/G2/webdrive/web-person-certificate.do?personId=" + dic["personId"] + "&actionFlag=view&data&filter_params_=rowNum,personBaseId,personId,certificateId,certificateType,registerLevel,certificateCode,certificatePhotoetch,gardenMajor,issuer,major,pmTitle,issueDate,registerValidEnd&defined_operations_=&nocheck_operations_=view&";//"http://202.104.65.182:8081/G2/webdrive/web-person-certificate.do?personId=" + enterpriseId + "&actionFlag=view&data&filter_params_=rowNum,personBaseId,personId,certificateId,certificateType,registerLevel,certificateCode,certificatePhotoetch,gardenMajor,issuer,major,pmTitle,issueDate,registerValidEnd&defined_operations_=&nocheck_operations_=view&";

                    string dtlJson = string.Empty;
                    try
                    {
                        dtlJson = ToolWeb.GetHtmlByUrl(dtlUrl, nvc);
                    }
                    catch { continue; }
                    Dictionary <string, object> dtlDic = (Dictionary <string, object>)serializer.DeserializeObject(dtlJson);
                    object[] dtlObjList = dtlDic["data"] as object[];
                    if (dtlObjList != null && dtlObjList.Length > 0)
                    {
                        Dictionary <string, object> dicDtl = dtlObjList[0] as Dictionary <string, object>;
                        CertNo   = Convert.ToString(dicDtl["certificateCode"]);
                        stffType = Convert.ToString(dicDtl["major"]);
                    }
                    CorpTecStaff staff = ToolDb.GenCorpTecStaff(info.Id, StaffName, IdCard, CertLevel, CertNo, info.Url, stffType);
                    ToolDb.SaveEntity(staff, string.Empty);
                }
            }
        }
示例#7
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list    = new ArrayList();
            string bidhtml = string.Empty;
            string html    = string.Empty;
            int    pageInt = 1;

            try
            {
                html = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                Logger.Error(ex.ToString());
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "GridView1"), new TagNameFilter("table")));

            if (nodeList != null && nodeList.Count > 0)
            {
                TableTag table = nodeList[0] as TableTag;
                for (int i = 1; i < table.RowCount; i++)
                {
                    Winista.Text.HtmlParser.Tags.TableRow tr = table.Rows[i];
                    ATag   alink = tr.Columns[8].SearchFor(typeof(ATag), true)[0] as ATag;
                    string view  = string.Empty;
                    string even  = string.Empty;
                    view = ToolWeb.GetAspNetViewState(html);
                    even = ToolWeb.GetAspNetEventValidation(html);
                    string alin             = alink.Link.Replace("__doPostBack('", "").Replace("','')", "");
                    NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] { "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "GridViewPaging1$txtGridViewPagingForwardTo", "__VIEWSTATEENCRYPTED", "__EVENTVALIDATION" },
                                                                             new string[] { alin, "", view, "1", "", even });
                    string cookies = string.Empty;
                    try
                    {
                        bidhtml = ToolWeb.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8, ref cookies);
                    }
                    catch (Exception ex) { Logger.Error(ex.ToString()); }


                    for (int l = 1; l <= 14; l++)
                    {
                        if (l == 7)
                        {
                            continue;
                        }
                        Save(l, bidhtml, list, crawlAll);
                    }
                }
            }
            if (sqlcount > 100)
            {
                string sql = string.Format("update CorpCreditjd set IsNew='0' where CreateTime<'{0}'", DateTime.Now.ToString("yyyy-MM-dd"));
                ToolDb.ExecuteSql(sql);
            }
            return(list);
        }
示例#8
0
        private void GetCorpStaffSzjsjMethod(string url, IList list, string html, bool crawlAll)
        {
            Parser   parser = new Parser(new Lexer(html));
            NodeList aNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "dgConstBid")));

            if (aNodes != null && aNodes.Count == 1 && aNodes[0] is TableTag)
            {
                TableTag table = (TableTag)aNodes[0];
                for (int i = 1; i < table.Rows.Length; i++)
                {
                    if (table.Rows[i].Columns.Length == 6)
                    {
                        Type   typs = typeof(ATag);
                        string Name = string.Empty, Sex = string.Empty, CredType = string.Empty, IdNum = string.Empty, CorpName = string.Empty, CorpCode = string.Empty, CertCode = string.Empty, CertGrade = string.Empty, RegLevel = string.Empty, RegCode = string.Empty, AuthorUnit = string.Empty, PersonType = string.Empty, Province = string.Empty, City = string.Empty, CreateTime = string.Empty, InfoSource = string.Empty, Url = string.Empty, Profession = string.Empty;
                        Name = table.Rows[i].Columns[1].ToPlainTextString().Trim().Replace("&nbsp;", "");
                        //Sex = table.Rows[i].Columns[1].ToPlainTextString().Trim().Replace("&nbsp;", "");
                        string urlSpilt = (table.Rows[i].Columns[1].Children.SearchFor(typs, true)[0] as ATag).Link;
                        string idnum    = urlSpilt.Replace("GoDetail('", "").Replace("');", "");                                                              //urlSpilt.Substring(urlSpilt.IndexOf("('"), (urlSpilt.Length  - 2));
                        IdNum      = idnum.Replace("&am", "").Replace("&a", "").Replace("p;c", "").Replace("cate", "").Replace("cat", "").Replace("ate", ""); //
                        CorpName   = table.Rows[i].Columns[2].ToPlainTextString().Trim().Replace("&nbsp;", "");
                        CorpCode   = CorpName;
                        CertCode   = table.Rows[i].Columns[4].ToPlainTextString().Trim().Replace("&nbsp;", "");
                        Profession = table.Rows[i].Columns[5].ToPlainTextString().Trim().Replace("&nbsp;", "");
                        PersonType = table.Rows[i].Columns[3].ToPlainTextString().Trim().Replace("&nbsp;", "");
                        Url        = "http://61.144.226.2/ryxx/Detail_LWDZ.aspx?ID_NUMBER=" + idnum;
                        string ctxhtml = string.Empty;
                        try
                        {
                            ctxhtml = ToolWeb.GetHtmlByUrl(Url, Encoding.Default);
                        }
                        catch (Exception ex)
                        {
                            Logger.Error("人员姓名:" + CorpName + ",证件号:" + IdNum + "所在单位:" + CorpName + "," + Url + ";" + ex);
                            continue;
                        }

                        Parser          parserCtx = new Parser(new Lexer(ctxhtml));
                        NodeList        ctxNode   = parserCtx.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("borderColor", "#cccccc")));
                        TableTag        tabTag    = ctxNode[0] as TableTag;
                        string          text      = ctxNode.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("TD"), new HasAttributeFilter("width", "76%")), true).AsString().Replace("&nbsp;", "");
                        string          strSpilt  = "任职企业编号:.*?\r\n";
                        MatchCollection mc        = Regex.Matches(text, strSpilt);
                        foreach (Match m in mc)
                        {
                            CorpCode = m.ToString().Replace("任职企业编号:", "").Replace("\r\n", "");
                        }
                        CorpStaff corpStaff = ToolDb.GenCorpStaff(Name, Sex, CredType, string.Empty, CorpName, CorpCode, CertCode, RegLevel, RegCode, AuthorUnit, PersonType, CertGrade, "广东省", "深圳市区", "深圳市住房和建设局", Url, Profession, "", "", "", "");
                        // list.Add(corpStaff);
                        ToolDb.SaveEntity(corpStaff, this.ExistCompareFields);

                        // if (!crawlAll && list.Count >= this.MaxCount) return;
                    }
                }
                parser.Reset();
            }
        }
示例#9
0
        protected void AddCorpLeader(CorpInfo info, string enterpriseId)
        {
            string url = "http://202.104.65.182:8081/G2/webdrive/web-enterprise-leader.do?enterpriseId=" + enterpriseId + "&data&filter_params_=rowNum,leaderId,name,title,safetyLicenseCode,safetyLicenseIssuer,safetyLicenseValidEnd&defined_operations_=&nocheck_operations_=&";

            string gridSearch       = "false";
            string nd               = ToolHtml.GetDateTimeLong(DateTime.Now).ToString();
            string PAGESIZE         = "1000";
            string PAGE             = "1";
            string sortField        = "";
            string sortDirection    = "asc";
            NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] {
                "gridSearch", "nd", "PAGESIZE", "PAGE", "sortField", "sortDirection"
            }, new string[] {
                gridSearch, nd, PAGESIZE, PAGE, sortField, sortDirection
            });
            string strJson = string.Empty;

            try
            {
                strJson = ToolWeb.GetHtmlByUrl(url, nvc);
            }
            catch { return; }


            JavaScriptSerializer        serializer  = new JavaScriptSerializer();
            Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(strJson);

            object[] objList = smsTypeJson["data"] as object[];
            if (objList != null)
            {
                foreach (object obj in objList)
                {
                    Dictionary <string, object> dic = obj as Dictionary <string, object>;
                    string LeaderName = string.Empty, LeaderDuty = string.Empty, LeaderType = string.Empty, htlCtx = string.Empty, phone = string.Empty;

                    LeaderName = Convert.ToString(dic["name"]);
                    LeaderType = Convert.ToString(dic["title"]);

                    CorpLeader corpLeader = ToolDb.GenCorpLeader(info.Id, LeaderName, LeaderDuty, LeaderType, info.Url, phone);
                    ToolDb.SaveEntity(corpLeader, string.Empty);
                }
            }
        }
示例#10
0
        protected void AddCorpPrompt(CorpInfo info, string infoUrl)
        {
            string htmldtl = string.Empty;

            try
            {
                htmldtl = ToolWeb.GetHtmlByUrl(infoUrl);
            }
            catch { }
            Parser   parser   = new Parser(new Lexer(htmldtl));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));

            if (nodeList != null && nodeList.Count > 0)
            {
                TableTag table = nodeList[0] as TableTag;
                for (int i = 1; i < table.RowCount; i++)
                {
                    if (table.Rows[i].ColumnCount <= 1)
                    {
                        break;
                    }
                    string prov = string.Empty, city = string.Empty, area = string.Empty, corpId = string.Empty, RecordCode = string.Empty, RecordName = string.Empty, RecordInfo = string.Empty, ImplUnit = string.Empty, BeginDate = string.Empty, InfoUrl = string.Empty;
                    bool   IsGood = false;

                    TableRow tr = table.Rows[i];
                    RecordCode = tr.Columns[0].ToNodePlainString();
                    RecordName = tr.Columns[1].ToNodePlainString();
                    RecordInfo = tr.Columns[2].ToNodePlainString();
                    ImplUnit   = tr.Columns[3].ToNodePlainString();
                    BeginDate  = tr.Columns[4].ToPlainTextString().GetDateRegex();

                    CorpPrompt corp = ToolDb.GetCorpPrompt(info.Province, info.City, "", info.Id, RecordCode, RecordName, RecordInfo, ImplUnit, BeginDate, IsGood, infoUrl);

                    ToolDb.SaveEntity(corp, string.Empty);
                }
            }
        }
示例#11
0
        protected void AddCorpResults(CorpInfo info, string infoUrl)
        {
            string htmldtl = string.Empty;

            try
            {
                htmldtl = ToolWeb.GetHtmlByUrl(infoUrl);
            }
            catch { }
            Parser   parser   = new Parser(new Lexer(htmldtl));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));

            if (nodeList != null && nodeList.Count > 0)
            {
                TableTag table = nodeList[0] as TableTag;
                for (int i = 1; i < table.RowCount; i++)
                {
                    if (table.Rows[i].ColumnCount <= 1)
                    {
                        break;
                    }
                    string PrjName = string.Empty, PrjCode = string.Empty, BuildUnit = string.Empty, GrantDate = string.Empty, PrjAddress = string.Empty, ChargeDept = string.Empty, PrjClassLevel = string.Empty, PrjClass = string.Empty, BuildArea = string.Empty, InviteArea = string.Empty, ProspUnit = string.Empty, DesignUnit = string.Empty, SuperUnit = string.Empty, ConstUnit = string.Empty, PrjStartDate = string.Empty, PrjEndDate = string.Empty;

                    TableRow tr = table.Rows[i];
                    PrjCode    = tr.Columns[1].ToNodePlainString();
                    PrjName    = tr.Columns[2].ToNodePlainString();
                    PrjAddress = tr.Columns[3].ToNodePlainString();
                    PrjClass   = tr.Columns[4].ToNodePlainString();
                    BuildUnit  = tr.Columns[5].ToNodePlainString();

                    CorpResults result = ToolDb.GenCorpResults(info.Id, PrjName, PrjCode, BuildUnit, GrantDate, PrjAddress, ChargeDept, PrjClassLevel, PrjClass, BuildArea, InviteArea, ProspUnit, DesignUnit, SuperUnit, ConstUnit, PrjStartDate, PrjEndDate, info.Url);

                    ToolDb.SaveEntity(result, string.Empty);
                }
            }
        }
示例#12
0
文件: Warning.cs 项目: SHNXJMG/Small
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list     = new ArrayList();
            string htl      = string.Empty;
            int    sqlCount = 0;

            try
            {
                htl = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(htl));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "list_table")));

            if (nodeList.Count > 0)
            {
                string WarningName = string.Empty, Color = string.Empty, Begindate = string.Empty, PrjName = string.Empty, DateStage = string.Empty, Score = string.Empty,
                       LastScore = string.Empty, CorpType = string.Empty, Number = string.Empty, UrlInfo = string.Empty, WarnCtx = string.Empty;
                TableTag table = (TableTag)nodeList[0];
                for (int j = 1; j < table.RowCount; j++)
                {
                    TableRow tr = table.Rows[j];
                    WarningName = tr.Columns[2].ToPlainTextString().Trim();

                    DateStage = "1";//0代表半年
                    ATag     aTag = tr.Columns[2].SearchFor(typeof(ATag), true)[0] as ATag;
                    ImageTag img  = tr.Columns[1].SearchFor(typeof(ImageTag), true)[0] as ImageTag;
                    string   ppp  = img.ImageURL;
                    if (img.ImageURL.Contains("yellow"))
                    {
                        Color = "1";//0代表红色,1代表黄色
                    }
                    else
                    {
                        Color = "0";//0代表红色,1代表黄色
                    }
                    PrjName = tr.Columns[3].ToPlainTextString().Trim();
                    UrlInfo = "http://61.144.226.2/jsxx/zjjsdetail.aspx?ID=" + aTag.Link.Replace("GoView(", "").Replace(");", "").Trim();
                    string htmldetail = string.Empty;
                    try
                    {
                        htmldetail = ToolWeb.GetHtmlByUrl(ToolWeb.UrlEncode(UrlInfo), Encoding.GetEncoding("GB2312")).Replace("= 602;", "罚");
                    }
                    catch (Exception)
                    {
                        continue;
                    }
                    Parser   dtlparser = new Parser(new Lexer(htmldetail));
                    NodeList dtnode    = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "Table8"), new TagNameFilter("table")));
                    if (dtnode.Count > 0)
                    {
                        WarnCtx = dtnode.AsString().Replace("\t", "").Replace("&nbsp;", "").Replace("\r\n", "").Trim();
                        Regex regDate = new Regex(@"\d{4}-\d{1,2}-\d{1,2}");
                        if (WarnCtx.Contains("警示开始日期"))
                        {
                            Begindate = WarnCtx.Substring(WarnCtx.IndexOf("警示开始日期")).ToString().Replace("警示开始日期:", "").Trim();
                        }
                        Begindate = regDate.Match(Begindate).Value.Trim();
                        CorpWarning info = new CorpWarning();
                        if (sqlCount <= this.MaxCount)
                        {
                            ToolDb.SaveEntity(info, this.ExistCompareFields);
                            sqlCount++;
                        }
                        else
                        {
                            return(list);
                        }
                    }
                }
            }
            return(list);
        }
示例#13
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            int    count = 1, totalCount = 1;
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;
            string pageHtl         = string.Empty;

            try
            {
                html = ToolWeb.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("a"), new HasAttributeFilter("id", "lx")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.GetATagHref().GetRegexBegEnd("page=", "&");
                    pageInt = int.Parse(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = ToolWeb.GetHtmlByUrl(this.SiteUrl + "&page=" + i.ToString(), Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "bean")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string CorpName = string.Empty, CorpCode = string.Empty, CorpAddress = string.Empty,
                               RegDate = string.Empty, RegFund = string.Empty, BusinessCode = string.Empty,
                               BusinessType = string.Empty, LinkMan = string.Empty, LinkPhone = string.Empty,
                               Fax = string.Empty, Email = string.Empty, CorpSite = string.Empty, cUrl = string.Empty,
                               ISOQualNum = string.Empty, ISOEnvironNum = string.Empty, OffAdr = string.Empty, Cert = string.Empty, ctxKc = string.Empty,
                               corpRz = string.Empty;

                        TableRow tr = table.Rows[j];
                        CorpName = tr.Columns[1].ToNodePlainString();
                        CorpCode = tr.Columns[2].ToNodePlainString();
                        LinkMan  = tr.Columns[3].ToNodePlainString();
                        string   href    = tr.Columns[1].GetATagValue("onclick");
                        string   htmldtl = string.Empty;
                        string[] url     = null;
                        try
                        {
                            string temp = href.Replace("doView", "").Replace("(", "").Replace(")", "").Replace("'", "");
                            url     = temp.Split(',');
                            cUrl    = "http://61.144.226.2:8001/web/enterprs/unitInfoAction.do?method=toView&qybh=" + url[0] + "&certType=1&orgcode=" + url[1];
                            htmldtl = ToolWeb.GetHtmlByUrl(cUrl, Encoding.Default);
                        }
                        catch
                        {
                            continue;
                        }
                        parser = new Parser(new Lexer(htmldtl.Replace("th", "td")));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "infoTableL")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            TableTag tabledtl = dtlNode[0] as TableTag;
                            string   ctx      = string.Empty;
                            for (int d = 0; d < tabledtl.RowCount; d++)
                            {
                                for (int k = 0; k < tabledtl.Rows[d].ColumnCount; k++)
                                {
                                    string temp = tabledtl.Rows[d].Columns[k].ToNodePlainString();
                                    if (k == 0)
                                    {
                                        ctx += temp + ":";
                                    }
                                    else
                                    {
                                        ctx += temp + "\r\n";
                                    }
                                }
                            }
                            LinkPhone   = ctx.GetRegex("联系电话");
                            Fax         = ctx.GetRegex("传真");
                            Email       = ctx.GetRegex("电子邮箱");
                            CorpAddress = ctx.GetRegex("注册地址");
                            RegFund     = ctx.GetRegex("注册资金");
                            RegDate     = ctx.GetRegex("设立时间");
                        }

                        CorpInfo info = ToolDb.GenCorpInfo(CorpName, CorpCode, CorpAddress, RegDate, RegFund, BusinessCode, BusinessType, LinkMan, LinkPhone, Fax, Email, CorpSite, "设计与施工一体化企业", "广东省", "深圳市", "深圳市住房和建设局", cUrl, ISOQualNum, ISOEnvironNum, OffAdr);

                        object obj = ToolDb.ExecuteScalar(string.Format("select Id from CorpInfo where CorpName='{0}' and CorpType='{1}' and InfoSource='{2}'", info.CorpName, info.CorpType, info.InfoSource));
                        int    qualCount = 0, leaderCount = 0, awardCount = 0, certCount = 0, punishCount = 0, seclicCount = 0, seclicstaffCount = 0, tecstaffCount = 0, deviceCount = 0, resultCount = 0, infoCount = 0;
                        bool   isDel = false;
                        if (obj != null && obj.ToString() != "")
                        {
                            isDel = true;
                            string        id                 = obj.ToString();
                            StringBuilder delCorpQual        = new System.Text.StringBuilder();
                            StringBuilder delCorpLeader      = new System.Text.StringBuilder();
                            StringBuilder delCorpAward       = new System.Text.StringBuilder();
                            StringBuilder delCorpCert        = new System.Text.StringBuilder();
                            StringBuilder delCorpPunish      = new System.Text.StringBuilder();
                            StringBuilder delCorpSecLic      = new System.Text.StringBuilder();
                            StringBuilder delCorpSecLicStaff = new System.Text.StringBuilder();
                            StringBuilder delCorpDevice      = new System.Text.StringBuilder();
                            StringBuilder delCorpResults     = new System.Text.StringBuilder();
                            StringBuilder delCorpTecStaff    = new System.Text.StringBuilder();
                            delCorpQual.AppendFormat("delete from CorpQual where CorpId='{0}'", id);
                            delCorpLeader.AppendFormat("delete from CorpLeader where CorpId='{0}'", id);
                            delCorpAward.AppendFormat("delete from CorpAward where CorpId='{0}'", id);
                            delCorpCert.AppendFormat("delete from CorpCert where CorpId='{0}'", id);
                            delCorpPunish.AppendFormat("delete from CorpPunish where CorpId='{0}'", id);
                            delCorpSecLic.AppendFormat("delete from CorpSecLic where CorpId='{0}'", id);
                            delCorpSecLicStaff.AppendFormat("delete from CorpSecLicStaff where CorpId='{0}'", id);
                            delCorpTecStaff.AppendFormat("delete from CorpTecStaff where CorpId='{0}'", id);
                            delCorpDevice.AppendFormat("delete from CorpDevice where CorpId='{0}'", id);
                            delCorpResults.AppendFormat("delete from CorpResults where CorpId='{0}'", id);
                            qualCount        = ToolCoreDb.ExecuteSql(delCorpQual.ToString());
                            leaderCount      = ToolCoreDb.ExecuteSql(delCorpLeader.ToString());
                            awardCount       = ToolCoreDb.ExecuteSql(delCorpAward.ToString());
                            certCount        = ToolCoreDb.ExecuteSql(delCorpCert.ToString());
                            punishCount      = ToolCoreDb.ExecuteSql(delCorpPunish.ToString());
                            seclicCount      = ToolCoreDb.ExecuteSql(delCorpSecLic.ToString());
                            seclicstaffCount = ToolCoreDb.ExecuteSql(delCorpSecLicStaff.ToString());
                            tecstaffCount    = ToolCoreDb.ExecuteSql(delCorpTecStaff.ToString());
                            deviceCount      = ToolCoreDb.ExecuteSql(delCorpDevice.ToString());
                            resultCount      = ToolCoreDb.ExecuteSql(delCorpResults.ToString());
                            string corpSql = string.Format("delete from CorpInfo where Id='{0}'", id);
                            infoCount = ToolCoreDb.ExecuteSql(corpSql);
                        }
                        if (infoCount != -1 || !isDel)
                        {
                            if (ToolDb.SaveEntity(info, string.Empty))
                            {
                                if (isDel)
                                {
                                    if (qualCount != -1)
                                    {
                                        AddCorpQual(info, htmldtl);
                                    }
                                    if (awardCount != -1)
                                    {
                                        AddCorpAward(info, htmldtl);
                                    }
                                    if (certCount != -1)
                                    {
                                        AddCorpCert(info, htmldtl);
                                    }
                                    if (deviceCount != -1)
                                    {
                                        AddCorpDevice(info, htmldtl);
                                    }
                                    if (punishCount != -1)
                                    {
                                        AddCorpPunish(info, htmldtl);
                                    }
                                    if (resultCount != -1)
                                    {
                                        AddCorpResults(info, htmldtl);
                                    }
                                    if (seclicCount != -1)
                                    {
                                        AddCorpSecLic(info, htmldtl);
                                    }
                                    if (seclicstaffCount != -1)
                                    {
                                        AddCorpSecLicStaff(info, htmldtl);
                                    }
                                    if (tecstaffCount != -1)
                                    {
                                        AddCorpTecStaff(info, htmldtl);
                                    }
                                    if (leaderCount != -1)
                                    {
                                        AddCorpLeader(info, htmldtl);
                                    }
                                }
                                else
                                {
                                    AddCorpQual(info, htmldtl);
                                    AddCorpAward(info, htmldtl);
                                    AddCorpCert(info, htmldtl);
                                    AddCorpDevice(info, htmldtl);
                                    AddCorpPunish(info, htmldtl);
                                    AddCorpResults(info, htmldtl);
                                    AddCorpSecLic(info, htmldtl);
                                    AddCorpSecLicStaff(info, htmldtl);
                                    AddCorpTecStaff(info, htmldtl);
                                    AddCorpLeader(info, htmldtl);
                                }
                            }
                        }
                        count++;
                        totalCount++;
                        if (count >= 90)
                        {
                            count = 1;
                            Thread.Sleep(700000);
                        }
                    }
                }
            }
            ToolCoreDb.ExecuteProcedure();
            string sql = "update a set a.FkId= c.Id FROM AttenCorp  a left join  CorpInfo c on c.CorpName=A.CorpName";

            ToolDb.ExecuteSql(sql);
            return(null);
        }
示例#14
0
        /// <summary>
        /// 企业资质信息
        /// </summary>
        /// <param name="info"></param>
        /// <param name="html"></param>
        protected void AddCorpQual(CorpInfo info, string param, string corpType)
        {
            string url = "http://portal.szjs.gov.cn:8888/publicShow/queryCertificateInfo.html";

            string[]            postParams = new string[] { "param", "corpType", "orgCode", "page" };
            string[]            postValues = new string[] { param, corpType, info.CorpCode, "1" };
            NameValueCollection nvc        = ToolWeb.GetNameValueCollection(postParams, postValues);
            string html = string.Empty;

            try
            {
                html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
            }
            catch
            {
                Thread.Sleep(12 * 60 * 1000);
                try
                {
                    html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
                }
                catch
                {
                    Thread.Sleep(8 * 60 * 1000);
                    return;
                }
            }
            JavaScriptSerializer        java        = new JavaScriptSerializer();
            Dictionary <string, object> jsonResults = (Dictionary <string, object>)java.DeserializeObject(html);
            int pageInt = 1;

            try
            {
                pageInt = (int)jsonResults["totalPage"];
            }
            catch { }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    postValues = new string[] { param, corpType, info.CorpCode, i.ToString() };
                    nvc        = ToolWeb.GetNameValueCollection(postParams, postValues);
                    try
                    {
                        html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
                    }
                    catch
                    {
                        Thread.Sleep(12 * 60 * 1000);
                        try
                        {
                            html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
                        }
                        catch
                        {
                            Thread.Sleep(8 * 60 * 1000);
                            continue;
                        }
                    }
                    jsonResults = (Dictionary <string, object>)java.DeserializeObject(html);
                }
                object[] dicQuals = (object[])jsonResults["records"];
                foreach (object dicQual in dicQuals)
                {
                    Dictionary <string, object> dic = (Dictionary <string, object>)dicQual;
                    string CorpId = string.Empty, QualName = string.Empty, QualCode = string.Empty, QualSeq = string.Empty, qualNum = string.Empty, QualType = string.Empty, QualLevel = string.Empty, ValidDate = string.Empty, LicDate = string.Empty, LicUnit = string.Empty;
                    QualType = Convert.ToString(dic["name"]);
                    QualCode = Convert.ToString(dic["cert_no"]);
                    string certType = Convert.ToString(dic["cert_type"]);
                    string certId   = Convert.ToString(dic["cert_id"]);
                    string htmldtl  = string.Empty;
                    string urlDtl   = "http://portal.szjs.gov.cn:8888/publicShow/queryCertificateDetail.html";
                    NameValueCollection dtlNvc = ToolWeb.GetNameValueCollection(new string[] { "param", "corpType", "cert_id" }, new string[] { param, certType, certId });
                    try
                    {
                        htmldtl = ToolWeb.GetHtmlByUrl(urlDtl, dtlNvc, Encoding.UTF8);
                    }
                    catch
                    {
                        Thread.Sleep(12 * 60 * 1000);
                        try
                        {
                            htmldtl = ToolWeb.GetHtmlByUrl(urlDtl, dtlNvc, Encoding.UTF8);
                        }
                        catch
                        {
                            Thread.Sleep(8 * 60 * 1000);
                            continue;
                        }
                    }
                    object[] dtlQuals = (object[])java.DeserializeObject(htmldtl);
                    foreach (object objQual in dtlQuals)
                    {
                        Dictionary <string, object> dicDtl = (Dictionary <string, object>)objQual;
                        QualName  = Convert.ToString(dicDtl["name1"]);
                        QualLevel = Convert.ToString(dicDtl["name2"]);
                        LicUnit   = Convert.ToString(dicDtl["appr_org"]);
                        LicDate   = Convert.ToString(dicDtl["appr_date"]);
                        ValidDate = Convert.ToString(dicDtl["valid_period"]);
                        qualNum   = QualLevel.GetLevel();
                        CorpQual qual = ToolDb.GenCorpQual(info.Id, QualName, QualCode, QualSeq, QualType, QualLevel, ValidDate, LicDate, LicUnit, info.Url, qualNum, "广东省", "深圳市");
                        ToolDb.SaveEntity(qual, string.Empty);
                    }
                }
            }
        }
示例#15
0
        /// <summary>
        /// 企业安全人员证书
        /// </summary>
        /// <param name="info"></param>
        /// <param name="html"></param>
        protected void AddCorpSecLicStaff(CorpInfo info, string param, string corpType)
        {
            string url = "http://portal.szjs.gov.cn:8888/publicShow/queryPersonSafe.html";

            string[]            postParams = new string[] { "param", "corpType", "orgCode", "page" };
            string[]            postValues = new string[] { param, corpType, info.CorpCode, "1" };
            NameValueCollection nvc        = ToolWeb.GetNameValueCollection(postParams, postValues);
            string html = string.Empty;

            try
            {
                html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
            }
            catch
            {
                Thread.Sleep(12 * 60 * 1000);
                try
                {
                    html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
                }
                catch
                {
                    Thread.Sleep(8 * 60 * 1000);
                    return;
                }
            }
            JavaScriptSerializer        java        = new JavaScriptSerializer();
            Dictionary <string, object> jsonResults = (Dictionary <string, object>)java.DeserializeObject(html);
            int pageInt = 1;

            try
            {
                pageInt = (int)jsonResults["totalPage"];
            }
            catch { }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    postValues = new string[] { param, corpType, info.CorpCode, i.ToString() };
                    nvc        = ToolWeb.GetNameValueCollection(postParams, postValues);
                    try
                    {
                        html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
                    }
                    catch
                    {
                        Thread.Sleep(12 * 60 * 1000);
                        try
                        {
                            html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
                        }
                        catch
                        {
                            Thread.Sleep(8 * 60 * 1000);
                            continue;
                        }
                    }
                    jsonResults = (Dictionary <string, object>)java.DeserializeObject(html);
                }
                object[] dicRecords = (object[])jsonResults["records"];
                foreach (object dicRecord in dicRecords)
                {
                    string PersonName = string.Empty, PersonCertNo = string.Empty, GrantUnit = string.Empty, GrantDate = string.Empty;

                    Dictionary <string, object> dic = (Dictionary <string, object>)dicRecord;
                    PersonName   = Convert.ToString(dic["name"]);
                    PersonCertNo = Convert.ToString(dic["lics_id"]);
                    GrantUnit    = Convert.ToString(dic["issue_dept"]);
                    GrantDate    = Convert.ToString(dic["issue_date"]);

                    CorpSecLicStaff SecLicStaff     = ToolDb.GenCorpSecLicStaff(info.Id, PersonName, PersonCertNo, GrantUnit, GrantDate, info.Url);
                    ToolDb.SaveEntity(SecLicStaff, string.Empty);
                }
            }
        }
示例#16
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = ToolWeb.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch { return(null); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "ContentPlaceHolder1_AspNetPager1")), true), new TagNameFilter("a")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[pageNode.Count - 1].GetATagHref().Replace("&#39;", "").Replace(")", "kdxx").Replace(",", "xxdk");
                    pageInt = int.Parse(temp.GetRegexBegEnd("xxdk", "kdxx"));
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        if (i == 2)
                        {
                            viewState       = ToolWeb.GetAspNetViewState(html);
                            eventValidation = ToolWeb.GetAspNetEventValidation(html);
                        }
                        NameValueCollection nvc = ToolWeb.GetNameValueCollection(
                            new string[] {
                            "ctl00$ContentPlaceHolder1$ScriptManager1",
                            "ctl00$ContentPlaceHolder1$txtORGNAME",
                            "ctl00$ContentPlaceHolder1$txtORGCODE",
                            "ctl00$ContentPlaceHolder1$txtPNAME",
                            "ctl00$ContentPlaceHolder1$txtIDNUM",
                            "ctl00$ContentPlaceHolder1$txtHIREERORGNAME",
                            "ctl00$ContentPlaceHolder1$txtHIREERORGCODE",
                            "ctl00$ContentPlaceHolder1$ddlRegType",
                            "ctl00$ContentPlaceHolder1$ddlTitle",
                            "ctl00$ContentPlaceHolder1$ddlABC",
                            "ctl00$ContentPlaceHolder1$ddlCert",
                            "__VIEWSTATE",
                            "__EVENTTARGET",
                            "__EVENTARGUMENT",
                            "__EVENTVALIDATION",
                            "__ASYNCPOST"
                        },
                            new string[] {
                            "ctl00$ContentPlaceHolder1$UpdatePanel1|ctl00$ContentPlaceHolder1$AspNetPager1",
                            "", "", "", "", "", "", "", "", "", "",
                            viewState,
                            "ctl00$ContentPlaceHolder1$AspNetPager1",
                            i.ToString(),
                            eventValidation,
                            "true"
                        }
                            );

                        html = ToolWeb.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "data-grid")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string CorpName = string.Empty, CorpCode = string.Empty, CorpAddress = string.Empty,
                               RegDate = string.Empty, RegFund = string.Empty, BusinessCode = string.Empty,
                               BusinessType = string.Empty, LinkMan = string.Empty, LinkPhone = string.Empty,
                               Fax = string.Empty, Email = string.Empty, CorpSite = string.Empty, cUrl = string.Empty, CorpType,
                               ISOQualNum = string.Empty, ISOEnvironNum = string.Empty, OffAdr = string.Empty, Cert = string.Empty;

                        TableRow tr = table.Rows[j];
                        CorpName = tr.Columns[0].ToNodePlainString();
                        LinkMan  = tr.Columns[1].ToNodePlainString();
                        cUrl     = tr.Columns[0].GetATagValue("onclick").Replace("OpenWin('", "");
                        if (cUrl.IndexOf("'") > 0)
                        {
                            cUrl = "http://113.108.219.40/intogd/" + cUrl.Remove(cUrl.IndexOf("'"));
                        }
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = ToolWeb.GetHtmlByUrl(cUrl, Encoding.UTF8);
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "data-table")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            string   ctx      = string.Empty;
                            TableTag dtlTable = dtlNode[0] as TableTag;
                            for (int k = 0; k < dtlTable.RowCount; k++)
                            {
                                for (int d = 0; d < dtlTable.Rows[k].ColumnCount; d++)
                                {
                                    TableColumn col = dtlTable.Rows[k].Columns[d];
                                    if (col.GetAttribute("class") == "td-left")
                                    {
                                        ctx += col.ToNodePlainString() + ":";
                                    }
                                    else
                                    {
                                        ctx += col.ToNodePlainString() + "\r\n";
                                    }
                                }
                            }


                            RegDate      = ctx.GetRegex("成立时间,注册时间").GetDateRegex();
                            RegFund      = ctx.GetRegex("注册资本");
                            BusinessCode = ctx.GetRegex("营业执照注册号");
                            CorpType     = "外地进粤企业";
                            CorpAddress  = ctx.GetRegex("注册详细地址");
                            if (!string.IsNullOrEmpty(RegFund) && !RegFund.Contains("万"))
                            {
                                RegFund += "万";
                            }

                            CorpInfo  corp   = ToolDb.GenCorpInfo(CorpName, CorpCode, CorpAddress, RegDate, RegFund, BusinessCode, BusinessType, LinkMan, LinkPhone, Fax, Email, CorpSite, CorpType, "广东省", "广东地区", "广东省住房和城乡建设厅", cUrl, ISOQualNum, ISOEnvironNum, OffAdr);
                            string    strSql = string.Format("select Id from CorpInfo where CorpName='{0}' and CorpType='{1}'", corp.CorpName, corp.CorpType);
                            DataTable dt     = ToolCoreDb.GetDbData(strSql);
                            if (dt != null && dt.Rows.Count > 0)
                            {
                                string        id                 = dt.Rows[0]["Id"].ToString();
                                StringBuilder delCorpQual        = new System.Text.StringBuilder();
                                StringBuilder delCorpLeader      = new System.Text.StringBuilder();
                                StringBuilder delCorpSecLicStaff = new System.Text.StringBuilder();
                                StringBuilder delCorpInstitution = new StringBuilder();
                                delCorpInstitution.AppendFormat("delete from CorpInstitution where CorpId='{0}'", id);
                                delCorpQual.AppendFormat("delete from CorpQual where CorpId='{0}'", id);
                                delCorpLeader.AppendFormat("delete from CorpLeader where CorpId='{0}'", id);
                                delCorpSecLicStaff.AppendFormat("delete from CorpTecStaff where CorpId='{0}'", id);
                                ToolCoreDb.ExecuteSql(delCorpInstitution.ToString());
                                ToolCoreDb.ExecuteSql(delCorpQual.ToString());
                                ToolCoreDb.ExecuteSql(delCorpLeader.ToString());
                                ToolCoreDb.ExecuteSql(delCorpSecLicStaff.ToString());
                                string corpSql = string.Format("delete from CorpInfo where Id='{0}'", id);
                                ToolCoreDb.ExecuteSql(corpSql);
                            }
                            if (ToolDb.SaveEntity(corp, this.ExistCompareFields))
                            {
                                if (!string.IsNullOrEmpty(LinkMan))
                                {
                                    CorpLeader leader = ToolDb.GenCorpLeader(corp.Id, LinkMan, "", "企业法定代表人", cUrl);
                                    ToolDb.SaveEntity(leader, "");
                                }
                                if (!string.IsNullOrEmpty(tr.Columns[2].ToNodePlainString()))
                                {
                                    CorpLeader leader = ToolDb.GenCorpLeader(corp.Id, tr.Columns[2].ToNodePlainString(), "", "技术负责人", cUrl);
                                    ToolDb.SaveEntity(leader, "");
                                }
                                if (!string.IsNullOrEmpty(tr.Columns[3].ToNodePlainString()))
                                {
                                    CorpLeader leader = ToolDb.GenCorpLeader(corp.Id, tr.Columns[3].ToNodePlainString(), "", "驻粤负责人", cUrl);
                                    ToolDb.SaveEntity(leader, "");
                                }
                                AddCorpQual(corp, htmldtl);
                                AddCorpTecStaff(corp, htmldtl);
                                GetOffAddress(htmldtl, cUrl, corp);
                            }
                        }
                    }
                }
            }
            return(null);
        }
示例#17
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string htl             = string.Empty;
            int    sqlCount        = 0;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(list);
            }
            if (htl.Contains("RowCount"))
            {
                try
                {
                    int     index   = htl.IndexOf("RowCount");
                    string  pageStr = htl.Substring(index, htl.Length - index).Replace("RowCount", "").Replace("}", "").Replace(":", "").Replace("\"", "");
                    decimal b       = decimal.Parse(pageStr) / 10;
                    if (b.ToString().Contains("."))
                    {
                        pageInt = Convert.ToInt32(b) + 1;
                    }
                    else
                    {
                        pageInt = Convert.ToInt32(b);
                    }
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        htl = ToolWeb.GetHtmlByUrl("http://www.szjs.gov.cn/build/build.ashx?_=1353579439242&menu=%E8%A1%8C%E6%94%BF%E5%A4%84%E7%BD%9A&pageSize=10&pageIndex=" + i.ToString() + "&fileOrg=&fileDate=&fileId=&unitName=&timp=", Encoding.UTF8);
                    }
                    catch { }
                }
                JavaScriptSerializer        serializer  = new JavaScriptSerializer();
                Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(htl);
                foreach (KeyValuePair <string, object> obj in smsTypeJson)
                {
                    if (obj.Key != "DataList")
                    {
                        continue;
                    }
                    object[] array = (object[])obj.Value;
                    foreach (object obj2 in array)
                    {
                        Dictionary <string, object> dicSmsType = (Dictionary <string, object>)obj2;
                        string DocNo = string.Empty, PunishType = string.Empty, GrantUnit = string.Empty, DocDate = string.Empty, PunishCtx = string.Empty, GrantName = string.Empty, InfoUrl = string.Empty;
                        try
                        {
                            DocNo      = Convert.ToString(dicSmsType["FileId"]);
                            PunishType = Convert.ToString(dicSmsType["PunTypeText"]);
                            GrantUnit  = Convert.ToString(dicSmsType["UnitName"]);
                            DocDate    = Convert.ToString(dicSmsType["ServiceDate"]);
                            InfoUrl    = "http://www.szjs.gov.cn/PUNhtml/" + Convert.ToString(dicSmsType["PunDoc"]);
                            CorpPunish info = ToolDb.GenCorpPunish(string.Empty, DocNo, PunishType, GrantUnit, DocDate, PunishCtx, InfoUrl, GrantName, "1");
                            if (sqlCount <= this.MaxCount)
                            {
                                if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                                {
                                    string file = Convert.ToString(dicSmsType["PunDoc"]);
                                    AddBaseFile(InfoUrl, file, info);
                                }
                                sqlCount++;
                            }
                            else
                            {
                                return(list);
                            }
                        }
                        catch { continue; }
                    }
                }
            }
            return(list);
        }
示例#18
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            int    count           = 1;
            IList  list            = new List <CorpWarning>();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(htl));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("a"), new HasAttributeFilter("id", "lx")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.GetATagHref().GetRegexBegEnd("page=", "&");
                    pageInt = int.Parse(temp);
                }
                catch
                { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        htl = ToolWeb.GetHtmlByUrl(this.SiteUrl + "&page=" + i.ToString(), Encoding.Default);
                    }
                    catch
                    {
                        continue;
                    }
                }
                parser = new Parser(new Lexer(htl));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "bean")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string code = string.Empty, warningName = string.Empty, deliveryDate = string.Empty, warningType = string.Empty, punishmentType = string.Empty, prjNumber = string.Empty, totalScore = string.Empty, resultScore = string.Empty, corpType = string.Empty, publicEndDate = string.Empty, warningEndDate = string.Empty, prjName = string.Empty, badInfo = string.Empty, msgType = string.Empty, color = string.Empty;

                        TableRow tr = table.Rows[j];
                        warningName   = tr.Columns[2].ToNodePlainString();
                        totalScore    = tr.Columns[3].ToNodePlainString();
                        prjNumber     = tr.Columns[4].ToNodePlainString();
                        resultScore   = tr.Columns[5].ToNodePlainString();
                        corpType      = tr.Columns[6].ToNodePlainString();
                        publicEndDate = tr.Columns[7].ToPlainTextString().GetDateRegex();
                        color         = "0";
                        warningType   = "半年度黄色警示";
                        msgType       = "深圳市住房和建设局";
                        CorpWarning info = ToolDb.GenCorpWarning("广东省", "深圳市区", "", code, warningName, deliveryDate, warningType, punishmentType, prjNumber, totalScore, resultScore, corpType, publicEndDate, warningEndDate, prjName, badInfo, msgType, color);
                        list.Add(info);
                        if (!crawlAll && list.Count >= this.MaxCount)
                        {
                            return(list);
                        }
                        count++;
                        if (count >= 200)
                        {
                            count = 1;
                            Thread.Sleep(480000);
                        }
                    }
                }
            }
            return(list);
        }
示例#19
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            int    count           = 0;
            IList  list            = new List <CorpStaff>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;
            string pageHtl         = string.Empty;

            try
            {
                html = ToolWeb.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(null);
            }
            int      totalPage = 0;
            Parser   parser    = new Parser(new Lexer(html));
            NodeList pageNode  = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "clearfix")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().Replace(",", ",");
                    string page = temp.GetRegexBegEnd("total", ",").GetReplace("\":");
                    totalPage = int.Parse(page);
                    pageInt   = totalPage / 15 + 1;
                }
                catch { }
            }
            for (int p = 1; p <= pageInt; p++)
            {
                if (p > 1)
                {
                    Logger.Error(p);
                    NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] {
                        "$total",
                        "$reload",
                        "$pg",
                        "$pgsz"
                    },
                                                                             new string[] {
                        totalPage.ToString(),
                        "0",
                        p.ToString(),
                        "15"
                    });
                    try
                    {
                        html = ToolWeb.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8);
                    }
                    catch
                    {
                        try
                        {
                            Thread.Sleep(60 * 1000 * 6);
                            html = ToolWeb.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8);
                        }
                        catch
                        {
                            try
                            {
                                Thread.Sleep(60 * 1000 * 6);
                                html = ToolWeb.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8);
                            }
                            catch
                            {
                                continue;
                            }
                        }
                    }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "table_box responsive personal")));

                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int i = 1; i < table.RowCount - 1; i++)
                    {
                        TableRow tr = table.Rows[i];

                        string Name = string.Empty, Sex = string.Empty, CredType = string.Empty, IdNum = string.Empty, CorpName = string.Empty, CorpCode = string.Empty, CertCode = string.Empty, CertGrade = string.Empty, RegLevel = string.Empty, RegCode = string.Empty, AuthorUnit = string.Empty, PersonType = string.Empty, Province = string.Empty, City = string.Empty, CreateTime = string.Empty, InfoSource = string.Empty, Url = string.Empty, Profession = string.Empty, staffNum = string.Empty, IssuanceTime = string.Empty, Organ = string.Empty;

                        Name       = tr.Columns[1].ToNodePlainString();
                        IdNum      = tr.Columns[2].ToNodePlainString();
                        CertGrade  = tr.Columns[3].ToNodePlainString();
                        RegCode    = tr.Columns[4].ToNodePlainString();
                        PersonType = tr.Columns[5].ToNodePlainString();
                        ATag aTag = tr.Columns[1].GetATag();
                        Url = "http://jzsc.mohurd.gov.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = ToolWeb.GetHtmlByUrl(Url, Encoding.UTF8).GetJsString();
                        }
                        catch
                        {
                            try
                            {
                                Thread.Sleep(60 * 1000 * 6);
                                htmldtl = ToolWeb.GetHtmlByUrl(Url, Encoding.UTF8).GetJsString();
                            }
                            catch
                            {
                                try
                                {
                                    Thread.Sleep(60 * 1000 * 6);
                                    htmldtl = ToolWeb.GetHtmlByUrl(Url, Encoding.UTF8).GetJsString();
                                }
                                catch
                                {
                                    continue;
                                }
                            }
                        }

                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "activeTinyTabContent")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            string ctx = dtlNode.AsHtml().GetReplace("</dd>", "\r\n").ToCtxString();
                            Sex = ctx.GetRegex("性别");
                        }
                        parser.Reset();
                        dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "regcert_tab")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            string ctx = dtlNode.AsHtml().GetReplace("</dd>", "\r\n").ToCtxString();
                            CertCode = ctx.GetRegex("证书编号");
                            ATag nameTag = dtlNode.GetATag(1);
                            if (nameTag != null)
                            {
                                CorpName = nameTag.LinkText.ToNodeString();
                            }
                        }

                        CorpStaff corpStaff = ToolDb.GenCorpStaff(Name, Sex, CredType, IdNum, CorpName, CorpCode, CertCode, RegLevel, RegCode, AuthorUnit, PersonType, CertGrade, "全国", "", "中华人民共和国住房和城乡建设部建筑市场监管司", Url, Profession, staffNum, IssuanceTime, Organ, "");
                        ToolDb.SaveEntity(corpStaff, this.ExistCompareFields, this.ExistsUpdate);

                        count++;

                        if (count >= 28)
                        {
                            count = 0;
                            Thread.Sleep(60 * 1000 * 6);
                        }
                    }
                }
            }
            return(null);
        }
示例#20
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("li"), new HasAttributeFilter("class", "wb-page-li")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("/", "\r");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                SiteUrl = "http://www.jxsggzy.cn/web/jyxx/002002/002002002/" + i + ".html";
                try
                {
                    html = ToolWeb.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr);
                }
                catch { continue; }

                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("li"), new HasAttributeFilter("class", "ewb-list-node clearfix")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                               prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                               specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                               remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                               CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty;
                        ATag aTag = listNode[j].GetATag();
                        prjName = aTag.GetAttribute("title");
                        if (string.IsNullOrWhiteSpace(prjName))
                        {
                            prjName = aTag.LinkText;
                        }
                        beginDate = listNode[j].ToPlainTextString().GetDateRegex();
                        if (prjName[2].Equals('县') || prjName[2].Equals('区') || prjName[2].Equals('市'))
                        {
                            area = prjName.Substring(0, 3);
                        }
                        InfoUrl = "http://www.jxsggzy.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "article-info")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            HtmlTxt    = dtlNode.AsHtml();
                            inviteCtx  = HtmlTxt.ToCtxString();
                            prjAddress = inviteCtx.GetAddressRegex();
                            if (string.IsNullOrWhiteSpace(prjAddress))
                            {
                                prjAddress = inviteCtx.GetRegexBegEnd("地址:", "地址:");
                            }

                            buildUnit = inviteCtx.GetBuildRegex();
                            if (buildUnit.Contains("单位章"))
                            {
                                buildUnit = string.Empty;
                            }
                            if (buildUnit.Contains("联系人"))
                            {
                                buildUnit = buildUnit.Remove(buildUnit.IndexOf("联系人"));
                            }
                            code       = inviteCtx.GetCodeRegex();
                            inviteType = "交通工程";
                            specType   = "政府采购";
                            msgType    = "江西省公共资源交易中心";
                            InviteInfo info = ToolDb.GenInviteInfo("江西省", "江西省及地市", area, string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
示例#21
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            string html  = string.Empty;
            string html1 = string.Empty;
            IList  list  = new ArrayList();

            try
            {
                html = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                Logger.Error(ex);
                return(list);
            }

            Parser parser          = new Parser(new Lexer(html));
            string cookiestr       = string.Empty;
            string viewState       = ToolWeb.GetAspNetViewState(parser);
            string eventValidation = ToolWeb.GetAspNetEventValidation(parser);
            string beginDate       = DateTime.Today.AddDays(-1).ToString("yyyy-MM-dd");

            for (int i = 1; i <= 12; i++)
            {
                string ddlIndex = string.Empty;
                ddlIndex = i.ToString();
                if (i == 12)
                {
                    ddlIndex = "999999999";
                }

                NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] { "ScriptManager1", "__EVENTTARGET", "__EVENTARGUMENT", "__LASTFOCUS", "__VIEWSTATE", "drpRpt", "DropDownList2", "txtCorpName", "DropDownList1", "GridViewPaging1$txtGridViewPagingForwardTo", "__EVENTVALIDATION" }, new string[] { "UpdatePanel1|DropDownList2", "DropDownList2", string.Empty, string.Empty, viewState, "419425", ddlIndex, string.Empty, "-1", "1", eventValidation });
                try
                {
                    html  = ToolWeb.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                    html1 = html;
                    //处理第一页
                    DealHtml(list, html, crawlAll, ddlIndex);
                }
                catch (Exception ex)
                {
                    Logger.Error(ex);
                }

                if (!crawlAll && list.Count >= this.MaxCount)
                {
                    return(list);
                }
                //取得页码
                int pageInt = 1;
                parser = new Parser(new Lexer(html));
                NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "GridViewPaging1_lblGridViewPagingDesc")));
                if (tdNodes != null)
                {
                    string pageTemp = tdNodes[0].ToPlainTextString().Trim();
                    try
                    {
                        pageTemp = pageTemp.Substring(pageTemp.IndexOf("页,共")).Replace("页,共", string.Empty).Replace("页", string.Empty);
                        pageInt  = int.Parse(pageTemp);
                    }
                    catch (Exception ex) { Logger.Error(ex); }
                }
                parser.Reset();

                //处理后续页
                if (pageInt > 1)
                {
                    for (int j = 2; j <= pageInt; j++)
                    {
                        string viewStatePage        = ToolWeb.GetAspNetViewState(html1);
                        string eventValidationPage  = ToolWeb.GetAspNetEventValidation(html1);
                        string cookPage             = string.Empty;
                        NameValueCollection nvcPage = null;
                        if (j == 14 && ddlIndex.Equals("2"))
                        {
                            j++;
                            nvcPage = ToolWeb.GetNameValueCollection(new string[] { "ScriptManager1", "__EVENTTARGET", "__EVENTARGUMENT", "__LASTFOCUS", "__VIEWSTATE", "drpRpt", "DropDownList2", "txtCorpName", "DropDownList1", "GridViewPaging1$txtGridViewPagingForwardTo", "__EVENTVALIDATION", "GridViewPaging1$btnForwardToPage" }, new string[] { "UpdatePanel1|GridViewPaging1$btnForwardToPage", string.Empty, string.Empty, string.Empty, viewStatePage, "419425", ddlIndex, string.Empty, "-1", j.ToString(), eventValidationPage, "Go" });
                        }
                        else
                        {
                            nvcPage = ToolWeb.GetNameValueCollection(new string[] { "ScriptManager1", "__EVENTTARGET", "__EVENTARGUMENT", "__LASTFOCUS", "__VIEWSTATE", "drpRpt", "DropDownList2", "txtCorpName", "DropDownList1", "GridViewPaging1$txtGridViewPagingForwardTo", "__EVENTVALIDATION", "GridViewPaging1$btnNext.x", "GridViewPaging1$btnNext.y" }, new string[] { "UpdatePanel1|GridViewPaging1$btnNext", string.Empty, string.Empty, string.Empty, viewStatePage, "419425", ddlIndex, string.Empty, "-1", (j - 1).ToString(), eventValidationPage, "6", "10" });
                        }

                        try
                        {
                            html = ToolWeb.GetHtmlByUrl(SiteUrl, nvcPage, Encoding.UTF8, ref cookiestr);
                            //处理后续页
                            DealHtml(list, html, crawlAll, ddlIndex);
                        }
                        catch (Exception ex)
                        {
                            Logger.Error(ex);
                            continue;
                        }
                    }
                }
                if (!crawlAll && list.Count >= this.MaxCount)
                {
                    return(list);
                }
            }
            return(list);
        }
示例#22
0
        /// <summary>
        /// 企业技术力量
        /// </summary>
        /// <param name="info"></param>
        /// <param name="html"></param>
        protected void AddCorpTecStaff(CorpInfo info, string param, string corpType)
        {
            string url = "http://portal.szjs.gov.cn:8888/publicShow/queryTechnology.html";

            string[]            postParams = new string[] { "param", "corpType", "orgCode", "page" };
            string[]            postValues = new string[] { param, corpType, info.CorpCode, "1" };
            NameValueCollection nvc        = ToolWeb.GetNameValueCollection(postParams, postValues);
            string html = string.Empty;

            try
            {
                html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
            }
            catch
            {
                Thread.Sleep(12 * 60 * 1000);
                try
                {
                    html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
                }
                catch
                {
                    Thread.Sleep(8 * 60 * 1000);
                    return;
                }
            }
            JavaScriptSerializer        java        = new JavaScriptSerializer();
            Dictionary <string, object> jsonResults = (Dictionary <string, object>)java.DeserializeObject(html);
            int pageInt = 1;

            try
            {
                pageInt = (int)jsonResults["totalPage"];
            }
            catch { }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    postValues = new string[] { param, corpType, info.CorpCode, i.ToString() };
                    nvc        = ToolWeb.GetNameValueCollection(postParams, postValues);
                    try
                    {
                        html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
                    }
                    catch
                    {
                        Thread.Sleep(12 * 60 * 1000);
                        try
                        {
                            Thread.Sleep(8 * 60 * 1000);
                            html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
                        }
                        catch
                        {
                            continue;
                        }
                    }
                    jsonResults = (Dictionary <string, object>)java.DeserializeObject(html);
                }
                object[] dicRecords = (object[])jsonResults["records"];
                foreach (object dicRecord in dicRecords)
                {
                    Dictionary <string, object> dic = (Dictionary <string, object>)dicRecord;
                    string StaffName = string.Empty, IdCard = string.Empty, CertLevel = string.Empty, CertNo = string.Empty, stffType = string.Empty;

                    StaffName = Convert.ToString(dic["name"]);
                    stffType  = Convert.ToString(dic["typename"]);
                    CertNo    = Convert.ToString(dic["alt_cert_id"]);
                    CertLevel = Convert.ToString(dic["alt_qual_lv"]);
                    IdCard    = Convert.ToString(dic["id_number"]);
                    CorpTecStaff staff = ToolDb.GenCorpTecStaff(info.Id, StaffName, IdCard, CertLevel, CertNo, info.Url, stffType);
                    ToolDb.SaveEntity(staff, string.Empty);
                }
            }
        }
示例#23
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list = new List <CorpInfo>();
            string html = string.Empty;
            string cookiestr = string.Empty;
            string viewState = string.Empty;
            int    pageInt = 1, count = 0;
            string eventValidation = string.Empty;

            try
            {
                html = ToolWeb.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("align", "center")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string countTemp = pageNode.AsString().GetRegexBegEnd("\r", "条").Replace("&nbsp;", "").Replace("\r", "").Replace("\n", "");
                    string temp      = pageNode.AsString().GetRegexBegEnd("/", "页").Replace("&nbsp;", "");
                    pageInt = int.Parse(temp);
                    count   = int.Parse(countTemp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    NameValueCollection nvc = ToolWeb.GetNameValueCollection(
                        new string[] { "datainfo_id", "datainfo_action", "count", "pages", "page", "dwmc", "zzdj", "zsbh", "szss" },
                        new string[] { string.Empty, string.Empty, count.ToString(), pageInt.ToString(), i.ToString(), string.Empty, string.Empty, string.Empty, string.Empty }
                        );
                    try
                    {
                        html = ToolWeb.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "list-table")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        TableRow tr = table.Rows[j];
                        string   CorpId = string.Empty, QualName = string.Empty, QualCode = string.Empty, QualSeq = string.Empty, qualNum = string.Empty, QualType = string.Empty, QualLevel = string.Empty, ValidDate = string.Empty, LicDate = string.Empty, LicUnit = string.Empty;

                        QualName  = "计算机信息系统集成";
                        QualCode  = tr.Columns[3].ToNodePlainString();
                        QualLevel = tr.Columns[2].ToNodePlainString();
                        LicDate   = tr.Columns[5].ToPlainTextString().GetDateRegex();
                        qualNum   = QualLevel.GetLevel();
                        string corpName = tr.Columns[1].ToNodePlainString();
                        string city     = tr.Columns[6].ToNodePlainString();
                        object isCorp   = ToolDb.ExecuteScalar("select Id from CorpInfo where CorpName='" + corpName + "'");
                        if (isCorp == null || isCorp.ToString() == "")
                        {
                            string CorpName = string.Empty, CorpCode = string.Empty, CorpAddress = string.Empty,
                                   RegDate = string.Empty, RegFund = string.Empty, BusinessCode = string.Empty,
                                   BusinessType = string.Empty, LinkMan = string.Empty, LinkPhone = string.Empty,
                                   Fax = string.Empty, Email = string.Empty, CorpSite = string.Empty, cUrl = string.Empty,
                                   ISOQualNum = string.Empty, ISOEnvironNum = string.Empty, OffAdr = string.Empty, Cert = string.Empty, ctxKc = string.Empty, corpRz = string.Empty;
                            CorpInfo info = ToolDb.GenCorpInfo(corpName, CorpCode, CorpAddress, RegDate, RegFund, BusinessCode, BusinessType, LinkMan, LinkPhone, Fax, Email, CorpSite, string.Empty, city, city, "中华人民共和国工业和信息化部", this.SiteUrl, ISOQualNum, ISOEnvironNum, OffAdr);
                            if (ToolDb.SaveEntity(info, null))
                            {
                                CorpQual qual = ToolDb.GenCorpQual(info.Id, QualName, QualCode, QualSeq, QualType, QualLevel, ValidDate, LicDate, LicUnit, info.Url, qualNum, city, city);
                                ToolDb.SaveEntity(qual, "");
                            }
                        }
                        else
                        {
                            CorpQual qual = ToolDb.GenCorpQual(isCorp.ToString(), QualName, QualCode, QualSeq, QualType, QualLevel, ValidDate, LicDate, LicUnit, this.SiteUrl, qualNum, city, city);
                            ToolDb.SaveEntity(qual, "QualCode,CorpId,QualName", true);
                        }
                    }
                }
            }
            return(list);
        }
示例#24
0
        /// <summary>
        /// 企业业绩
        /// </summary>
        /// <param name="info"></param>
        /// <param name="html"></param>
        protected void AddCorpResults(CorpInfo info, string param, string corpType)
        {
            string url = "http://portal.szjs.gov.cn:8888/publicShow/queryPerformance.html";

            string[]            postParams = new string[] { "param", "corpType", "orgCode", "page" };
            string[]            postValues = new string[] { param, corpType, info.CorpCode, "1" };
            NameValueCollection nvc        = ToolWeb.GetNameValueCollection(postParams, postValues);
            string html = string.Empty;

            try
            {
                html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
            }
            catch
            {
                Thread.Sleep(12 * 60 * 1000);
                try
                {
                    html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
                }
                catch
                {
                    Thread.Sleep(8 * 60 * 1000);
                    return;
                }
            }
            JavaScriptSerializer        java        = new JavaScriptSerializer();
            Dictionary <string, object> jsonResults = (Dictionary <string, object>)java.DeserializeObject(html);
            int pageInt = 1;

            try
            {
                pageInt = (int)jsonResults["totalPage"];
            }
            catch { }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    postValues = new string[] { param, corpType, info.CorpCode, i.ToString() };
                    nvc        = ToolWeb.GetNameValueCollection(postParams, postValues);
                    try
                    {
                        html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
                    }
                    catch
                    {
                        Thread.Sleep(12 * 60 * 1000);
                        try
                        {
                            html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
                        }
                        catch
                        {
                            Thread.Sleep(8 * 60 * 1000);
                            continue;
                        }
                    }
                    jsonResults = (Dictionary <string, object>)java.DeserializeObject(html);
                }
                object[] dicRecords = (object[])jsonResults["records"];
                foreach (object dicRecord in dicRecords)
                {
                    string PrjName = string.Empty, PrjCode = string.Empty, BuildUnit = string.Empty, GrantDate = string.Empty, PrjAddress = string.Empty, ChargeDept = string.Empty, PrjClassLevel = string.Empty, PrjClass = string.Empty, BuildArea = string.Empty, InviteArea = string.Empty, ProspUnit = string.Empty, DesignUnit = string.Empty, SuperUnit = string.Empty, ConstUnit = string.Empty, PrjStartDate = string.Empty, PrjEndDate = string.Empty;

                    PrjName   = "业绩";
                    PrjCode   = "业绩";
                    BuildUnit = "业绩";
                    GrantDate = DateTime.Today.ToString();

                    CorpResults result = ToolDb.GenCorpResults(info.Id, PrjName, PrjCode, BuildUnit, GrantDate, PrjAddress, ChargeDept, PrjClassLevel, PrjClass, BuildArea, InviteArea, ProspUnit, DesignUnit, SuperUnit, ConstUnit, PrjStartDate, PrjEndDate, info.Url);

                    ToolDb.SaveEntity(result, string.Empty);
                }
            }
        }
示例#25
0
文件: Info.cs 项目: SHNXJMG/Small
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <BidInfo>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("id", "myPages_input")), true), new TagNameFilter("option")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    OptionTag opt  = pageList[pageList.Count - 1] as OptionTag;
                    string    temp = opt.GetAttribute("value");
                    pageInt = int.Parse(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = ToolWeb.GetHtmlByUrl(this.SiteUrl + "&pageNo=" + i.ToString(), Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("dl"), new HasAttributeFilter("class", "i-news")), true), new TagNameFilter("dd")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    for (int j = 0; j < nodeList.Count; j++)
                    {
                        string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        beginDate = nodeList[j].ToPlainTextString().GetDateRegex();
                        if (!string.IsNullOrEmpty(beginDate))
                        {
                            prjName = nodeList[j].ToNodePlainString().Replace(beginDate, "").Replace("[", "").Replace("]", "");
                        }
                        else
                        {
                            prjName = nodeList[j].ToNodePlainString().Replace("[", "").Replace("]", "");
                        }
                        prjName = prjName.GetBidPrjName();
                        bidType = prjName.GetInviteBidType();
                        InfoUrl = "http://www.chinapsp.cn/cn/info.aspx" + nodeList[j].GetATagHref();
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = ToolWeb.GetHtmlByUrl(InfoUrl, Encoding.UTF8);
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "dbDetailFV")));
                        if (dtList != null && dtList.Count > 0)
                        {
                            HtmlTxt = dtList.AsHtml();
                            bidCtx  = HtmlTxt.ToLower().Replace("<tr>", "\r\n").Replace("</tr>", "\r\n").ToCtxString();
                            if (prjName.Contains("招标编号") || prjName.Contains("项目编号"))
                            {
                                if (prjName.IndexOf("(") != -1)
                                {
                                    prjName = prjName.Remove(prjName.IndexOf("("));
                                }
                                else if (prjName.IndexOf("(") != -1)
                                {
                                    prjName = prjName.Remove(prjName.IndexOf("("));
                                }
                                else if (prjName.Contains("招标编号"))
                                {
                                    prjName = prjName.Remove(prjName.IndexOf("招标编号"));
                                }
                                else if (prjName.Contains("项目编号"))
                                {
                                    prjName = prjName.Remove(prjName.IndexOf("项目编号"));
                                }
                            }
                            buildUnit  = bidCtx.GetBuildRegex();
                            prjAddress = bidCtx.Replace(" ", "").GetAddressRegex();

                            bidUnit  = bidCtx.GetBidRegex();
                            bidMoney = bidCtx.GetMoneyRegex();
                            if (!string.IsNullOrEmpty(bidMoney))
                            {
                                decimal money = Convert.ToDecimal(bidMoney);
                                if (money > 10000)
                                {
                                    bidMoney = Convert.ToString(money / 10000);
                                }
                            }
                            if (bidMoney == "0")
                            {
                                bidMoney = bidCtx.GetMoneyRegex(null, true);
                                if (string.IsNullOrEmpty(bidMoney))
                                {
                                    bidMoney = "0";
                                }
                            }
                            if (!string.IsNullOrEmpty(bidMoney))
                            {
                                decimal money = Convert.ToDecimal(bidMoney);
                                if (money > 10000)
                                {
                                    bidMoney = Convert.ToString(money / 10000);
                                }
                            }
                            if (bidMoney == "0")
                            {
                                bidMoney = bidCtx.ToLower().GetMoneyRegex(new string[] { "rmb" });
                            }
                            if (string.IsNullOrEmpty(bidUnit) && bidMoney == "0")
                            {
                                if (bidCtx.Contains("采购失败") || bidCtx.Contains("本项目招标失败"))
                                {
                                    bidUnit  = "没有中标商";
                                    bidMoney = "0";
                                }
                            }
                            code     = bidCtx.GetCodeRegex().GetChina();
                            specType = "其他";
                            msgType  = "广东采联采购招标有限公司";
                            prjName  = prjName.GetBidPrjName();
                            BidInfo info = ToolDb.GenBidInfo("广东省", "深圳社会招标", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt);
                            ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx, " and LastModifier ='00000000000000000000000000000000'");
                        }
                    }
                }
            }
            return(list);
        }
示例#26
0
        /// <summary>
        /// 企业获奖信息
        /// </summary>
        /// <param name="info"></param>
        /// <param name="html"></param>
        protected void AddCorpAward(CorpInfo info, string param, string corpType)
        {
            string url = "http://portal.szjs.gov.cn:8888/publicShow/queryPrizes.html";

            string[]            postParams = new string[] { "param", "corpType", "orgCode", "page" };
            string[]            postValues = new string[] { param, corpType, info.CorpCode, "1" };
            NameValueCollection nvc        = ToolWeb.GetNameValueCollection(postParams, postValues);
            string html = string.Empty;

            try
            {
                html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
            }
            catch
            {
                Thread.Sleep(12 * 60 * 1000);
                try
                {
                    html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
                }
                catch
                {
                    Thread.Sleep(8 * 60 * 1000);
                    return;
                }
            }
            JavaScriptSerializer        java        = new JavaScriptSerializer();
            Dictionary <string, object> jsonResults = (Dictionary <string, object>)java.DeserializeObject(html);
            int pageInt = 1;

            try
            {
                pageInt = (int)jsonResults["totalPage"];
            }
            catch { }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    postValues = new string[] { param, corpType, info.CorpCode, i.ToString() };
                    nvc        = ToolWeb.GetNameValueCollection(postParams, postValues);
                    try
                    {
                        html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
                    }
                    catch
                    {
                        Thread.Sleep(12 * 60 * 1000);
                        try
                        {
                            html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
                        }
                        catch
                        {
                            Thread.Sleep(8 * 60 * 1000);
                            continue;
                        }
                    }
                    jsonResults = (Dictionary <string, object>)java.DeserializeObject(html);
                }
                object[] dicRecords = (object[])jsonResults["records"];
                foreach (object dicRecord in dicRecords)
                {
                    Dictionary <string, object> dic = (Dictionary <string, object>)dicRecord;
                    string AwardName = string.Empty, AwardDate = string.Empty, AwardLevel = string.Empty, GrantUnit = string.Empty, ProjectName = string.Empty;
                    AwardName   = Convert.ToString(dic["award_name"]);
                    AwardDate   = Convert.ToString(dic["award_date"]);
                    AwardLevel  = Convert.ToString(dic["award_lvl"]);
                    GrantUnit   = Convert.ToString(dic["award_org"]);
                    ProjectName = Convert.ToString(dic["rel_prj"]);
                    CorpAward award = ToolDb.GenCorpAward(info.Id, AwardName, AwardDate, AwardLevel, GrantUnit, ProjectName, info.Url);
                    ToolDb.SaveEntity(award, string.Empty);
                }
            }
        }
示例#27
0
        /// <summary>
        /// 保存企业技术管理人员情况
        /// </summary>
        /// <param name="info"></param>
        /// <param name="html"></param>
        private void AddCorpTecStaff(CorpInfo info, string html)
        {
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            viewState       = ToolWeb.GetAspNetViewState(html);
            eventValidation = ToolWeb.GetAspNetEventValidation(html);
            int pageInt             = 1;
            NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] {
                "ctl00$MainContent$ScriptManager1",
                "__EVENTTARGET",
                "__EVENTARGUMENT",
                "__VIEWSTATE",
                "__EVENTVALIDATION",
                "__ASYNCPOST"
            }, new string[] {
                "ctl00$MainContent$UpdatePanel1|ctl00$MainContent$step3",
                "ctl00$MainContent$step3",
                "",
                viewState,
                eventValidation,
                "true"
            });
            string htmldtl = string.Empty;

            try
            {
                htmldtl = ToolWeb.GetHtmlByUrl(info.Url, nvc, Encoding.UTF8, ref cookiestr);
            }
            catch { }

            //Parser parser = new Parser(new Lexer(htmldtl));
            //NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "MainContent_AspNetPager1")), true), new TagNameFilter("a")));
            //if (pageNode != null && pageNode.Count > 0)
            //{
            //    try
            //    {
            //        string temp = pageNode[pageNode.Count - 1].GetATagHref().Replace("&#39;", "").Replace(")", "kdxx").Replace(",", "xxdk");
            //        pageInt = int.Parse(temp.GetRegexBegEnd("xxdk", "kdxx"));
            //    }
            //    catch { }
            //}
            //for (int i = 1; i <= pageInt; i++)
            //{
            //    if (i > 1)
            //    {
            //        NameValueCollection nvc1 = ToolWeb.GetNameValueCollection(new string[]{
            //        "ctl00$MainContent$ScriptManager1",
            //        "__EVENTTARGET",
            //        "__EVENTARGUMENT",
            //        "__VIEWSTATE",
            //        "__EVENTVALIDATION",
            //        "__ASYNCPOST"
            //        }, new string[]{
            //        "ctl00$MainContent$UpdatePanel1|ctl00$MainContent$AspNetPager1",
            //        "ctl00$MainContent$AspNetPager1",
            //        i.ToString(),
            //        viewState,
            //        eventValidation,
            //        "true"
            //        });
            //        try
            //        {
            //            htmldtl = ToolWeb.GetHtmlByUrl("http://113.108.219.40/intogd/Open/EnterpriseInfo.aspx?ID=1aNTSgxf1zvCznU8XPW9UQ==", nvc1, Encoding.UTF8, ref cookiestr);
            //        }
            //        catch { continue; }
            //    }
            Parser   parser  = new Parser(new Lexer(htmldtl));
            NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "data-grid")));

            if (dtlNode != null && dtlNode.Count > 0)
            {
                TableTag table = dtlNode[0] as TableTag;
                for (int j = 1; j < table.RowCount; j++)
                {
                    TableRow tr = table.Rows[j];
                    string   StaffName = string.Empty, IdCard = string.Empty, CertLevel = string.Empty, CertNo = string.Empty, stffType = string.Empty;
                    StaffName = tr.Columns[0].ToNodePlainString();
                    stffType  = tr.Columns[1].ToNodePlainString();
                    string aHref    = "http://113.108.219.40/intogd/Open/" + tr.Columns[0].GetATagHref();
                    string staffDtl = string.Empty;
                    try
                    {
                        staffDtl = ToolWeb.GetHtmlByUrl(aHref, Encoding.UTF8);
                    }
                    catch { }
                    parser = new Parser(new Lexer(staffDtl));
                    NodeList staffNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "100%")));
                    if (staffNode != null && staffNode.Count > 0)
                    {
                        string   ctx      = string.Empty;
                        TableTag dtlTable = staffNode[1] as TableTag;
                        for (int k = 0; k < dtlTable.RowCount; k++)
                        {
                            for (int d = 0; d < dtlTable.Rows[k].ColumnCount; d++)
                            {
                                TableColumn col = dtlTable.Rows[k].Columns[d];
                                if (col.GetAttribute("class") == "td-left")
                                {
                                    ctx += col.ToNodePlainString() + ":";
                                }
                                else
                                {
                                    ctx += col.ToNodePlainString() + "\r\n";
                                }
                            }
                        }
                        CertNo = ctx.GetRegex("职称证号");
                    }

                    CorpTecStaff staff = ToolDb.GenCorpTecStaff(info.Id, StaffName, IdCard, CertLevel, CertNo, info.Url, stffType);
                    ToolDb.SaveEntity(staff, string.Empty);
                }
            }
            //  }
        }
示例#28
0
        /// <summary>
        /// 企业处罚信息
        /// </summary>
        /// <param name="info"></param>
        /// <param name="html"></param>
        protected void AddCorpPunish(CorpInfo info, string param, string corpType)
        {
            string url = "http://portal.szjs.gov.cn:8888/publicShow/queryPunish.html";

            string[]            postParams = new string[] { "param", "corpType", "orgCode", "page" };
            string[]            postValues = new string[] { param, corpType, info.CorpCode, "1" };
            NameValueCollection nvc        = ToolWeb.GetNameValueCollection(postParams, postValues);
            string html = string.Empty;

            try
            {
                html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
            }
            catch
            {
                Thread.Sleep(12 * 60 * 1000);
                try
                {
                    html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
                }
                catch
                {
                    Thread.Sleep(8 * 60 * 1000);
                    return;
                }
            }
            JavaScriptSerializer        java        = new JavaScriptSerializer();
            Dictionary <string, object> jsonResults = (Dictionary <string, object>)java.DeserializeObject(html);
            int pageInt = 1;

            try
            {
                pageInt = (int)jsonResults["totalPage"];
            }
            catch { }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    postValues = new string[] { param, corpType, info.CorpCode, i.ToString() };
                    nvc        = ToolWeb.GetNameValueCollection(postParams, postValues);
                    try
                    {
                        html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
                    }
                    catch
                    {
                        Thread.Sleep(12 * 60 * 1000);
                        try
                        {
                            html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
                        }
                        catch
                        {
                            Thread.Sleep(8 * 60 * 1000);
                            continue;
                        }
                    }
                    jsonResults = (Dictionary <string, object>)java.DeserializeObject(html);
                }
                object[] dicRecords = (object[])jsonResults["records"];
                foreach (object dicRecord in dicRecords)
                {
                    string DocNo = string.Empty, PunishType = string.Empty, GrantUnit = string.Empty, DocDate = string.Empty, PunishCtx = string.Empty, IsShow = string.Empty;

                    Dictionary <string, object> dic = (Dictionary <string, object>)dicRecord;
                    DocNo      = Convert.ToString(dic["file_id"]);
                    PunishType = Convert.ToString(dic["pun_type_text"]);
                    GrantUnit  = Convert.ToString(dic["file_org"]);
                    DocDate    = Convert.ToString(dic["file_date"]);

                    CorpPunish punish = ToolDb.GenCorpPunish(info.Id, DocNo, PunishType, GrantUnit, DocDate, PunishCtx, info.Url, "0");

                    ToolDb.SaveEntity(punish, string.Empty);
                }
            }
        }
示例#29
0
        /// <summary>
        /// 获取办公地址并保存分支机构信息
        /// </summary>
        /// <param name="html"></param>
        /// <param name="url"></param>
        /// <returns></returns>
        private string GetOffAddress(string html, string url, CorpInfo info)
        {
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            viewState       = ToolWeb.GetAspNetViewState(html);
            eventValidation = ToolWeb.GetAspNetEventValidation(html);
            string returnValue      = string.Empty;
            int    pageInt          = 1;
            NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] {
                "ctl00$MainContent$ScriptManager1",
                "__EVENTTARGET",
                "__EVENTARGUMENT",
                "__VIEWSTATE",
                "__EVENTVALIDATION",
                "__ASYNCPOST"
            }, new string[] {
                "ctl00$MainContent$UpdatePanel1|ctl00$MainContent$step2",
                "ctl00$MainContent$step2",
                "",
                viewState,
                eventValidation,
                "true"
            });
            string htmldtl = string.Empty;

            try
            {
                htmldtl = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8, ref cookiestr);
            }
            catch { }
            Parser   parser  = new Parser(new Lexer(htmldtl));
            NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "data-grid")));

            if (dtlNode != null && dtlNode.Count > 0)
            {
                TableTag table = dtlNode[0] as TableTag;
                for (int i = 1; i < table.RowCount; i++)
                {
                    string   CorpName = string.Empty, CorpCode = string.Empty, Location = string.Empty, DtlAddress = string.Empty, PostalCode = string.Empty, ResInstitution = string.Empty, LinkMan = string.Empty, LinPhone = string.Empty, Fax = string.Empty, BusinessCode = string.Empty, RegDate = string.Empty, Email = string.Empty, SafetyCode = string.Empty, TotalReMan = string.Empty, TechReMan = string.Empty, SafeReMan = string.Empty, QualityReMan = string.Empty, Url = string.Empty, TotalSafetyCode = string.Empty, TechSafetyCode = string.Empty, QualitySafetyCode = string.Empty;
                    TableRow tr = table.Rows[i];
                    Url          = "http://113.108.219.40/intogd/Open/" + tr.Columns[0].GetATagHref();
                    CorpName     = tr.Columns[0].ToNodePlainString();
                    TotalReMan   = tr.Columns[2].ToNodePlainString();
                    TechReMan    = tr.Columns[3].ToNodePlainString();
                    QualityReMan = tr.Columns[4].ToNodePlainString();
                    SafeReMan    = tr.Columns[5].ToNodePlainString();
                    string dtlHtml = string.Empty;
                    try
                    {
                        dtlHtml = ToolWeb.GetHtmlByUrl(Url, Encoding.UTF8);
                    }
                    catch { continue; }
                    parser = new Parser(new Lexer(dtlHtml));
                    NodeList staffNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "100%")));
                    if (staffNode != null && staffNode.Count > 1)
                    {
                        string   ctx      = string.Empty;
                        TableTag dtlTable = staffNode[1] as TableTag;
                        for (int k = 0; k < dtlTable.RowCount; k++)
                        {
                            for (int d = 0; d < dtlTable.Rows[k].ColumnCount; d++)
                            {
                                TableColumn col = dtlTable.Rows[k].Columns[d];
                                if (col.GetAttribute("class") == "td-left")
                                {
                                    ctx += col.ToNodePlainString() + ":";
                                }
                                else
                                {
                                    ctx += col.ToNodePlainString() + "\r\n";
                                }
                            }
                        }
                        if (string.IsNullOrEmpty(returnValue))
                        {
                            returnValue = ctx.GetRegex("详细地址");
                        }

                        CorpCode       = ctx.GetRegex("组织机构代码");
                        Location       = ctx.GetRegex("所在地");
                        DtlAddress     = ctx.GetRegex("详细地址");
                        PostalCode     = ctx.GetRegex("邮政编码");
                        ResInstitution = ctx.GetRegex("驻粤负责机构");
                        LinkMan        = ctx.Replace(" ", "").GetRegex("联系人");
                        Fax            = ctx.GetRegex("传真号码");
                        LinPhone       = ctx.GetRegex("联系电话");
                        BusinessCode   = ctx.GetRegex("营业执照注册号").Replace("分", "");
                        RegDate        = ctx.GetRegex("设立时间").GetDateRegex();
                        Email          = ctx.GetRegex("邮箱");
                        parser.Reset();
                        NodeList safeNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "data-table")));
                        if (safeNode != null && safeNode.Count > 2)
                        {
                            TableTag safeTable          = safeNode[2] as TableTag;
                            string   TotalSafetyCodeCtx = string.Empty;
                            string   TechSafetyCodeCtx  = string.Empty;
                            for (int k = 0; k < safeTable.RowCount; k++)
                            {
                                for (int d = 0; d < safeTable.Rows[k].ColumnCount; d++)
                                {
                                    TableColumn col = safeTable.Rows[k].Columns[d];
                                    if (d >= 2)
                                    {
                                        if (col.GetAttribute("class") == "td-left")
                                        {
                                            TechSafetyCodeCtx += col.ToNodePlainString() + ":";
                                        }
                                        else
                                        {
                                            TechSafetyCodeCtx += col.ToNodePlainString() + "\r\n";
                                        }
                                    }
                                    else
                                    {
                                        if (col.GetAttribute("class") == "td-left")
                                        {
                                            TotalSafetyCodeCtx += col.ToNodePlainString() + ":";
                                        }
                                        else
                                        {
                                            TotalSafetyCodeCtx += col.ToNodePlainString() + "\r\n";
                                        }
                                    }
                                }
                            }
                            TotalSafetyCode = ToolHtml.GetRegexStringNot(TotalSafetyCodeCtx, new string[] { "安全生产考核合格证号(A证)" });
                            TechSafetyCode  = ToolHtml.GetRegexStringNot(TechSafetyCodeCtx, new string[] { "安全生产考核合格证号(A证)" });
                        }

                        if (safeNode != null && safeNode.Count > 4)
                        {
                            TableTag safeTable            = safeNode[4] as TableTag;
                            string   SafetyCodeCtx        = string.Empty;
                            string   QualitySafetyCodeCtx = string.Empty;
                            for (int k = 0; k < safeTable.RowCount; k++)
                            {
                                for (int d = 0; d < safeTable.Rows[k].ColumnCount; d++)
                                {
                                    TableColumn col = safeTable.Rows[k].Columns[d];
                                    if (d >= 2)
                                    {
                                        if (col.GetAttribute("class") == "td-left")
                                        {
                                            QualitySafetyCodeCtx += col.ToNodePlainString() + ":";
                                        }
                                        else
                                        {
                                            QualitySafetyCodeCtx += col.ToNodePlainString() + "\r\n";
                                        }
                                    }
                                    else
                                    {
                                        if (col.GetAttribute("class") == "td-left")
                                        {
                                            SafetyCodeCtx += col.ToNodePlainString() + ":";
                                        }
                                        else
                                        {
                                            SafetyCodeCtx += col.ToNodePlainString() + "\r\n";
                                        }
                                    }
                                }
                            }
                            SafetyCode        = ToolHtml.GetRegexStringNot(SafetyCodeCtx, new string[] { "安全生产考核合格证号(A或B证)" });
                            QualitySafetyCode = ToolHtml.GetRegexStringNot(QualitySafetyCodeCtx, new string[] { "安全生产考核合格证号" });// QualitySafetyCodeCtx.GetRegex("安全生产考核合格证号");
                        }

                        CorpInstitution entity = ToolDb.GenCorpInstitution("广东省", "广东地区", info.Id, CorpName, CorpCode, Location, DtlAddress, PostalCode, ResInstitution, LinkMan, LinPhone, Fax, BusinessCode, RegDate, Email, SafetyCode, TotalReMan, TechReMan, SafeReMan, QualityReMan, Url, TotalSafetyCode, TechSafetyCode, QualitySafetyCode);

                        ToolDb.SaveEntity(entity, string.Empty);
                    }
                }
            }
            return(returnValue);
        }
示例#30
0
        /// <summary>
        /// 企业安全许可
        /// </summary>
        /// <param name="info"></param>
        /// <param name="html"></param>
        protected void AddCorpSecLic(CorpInfo info, string param, string corpType)
        {
            string url = "http://portal.szjs.gov.cn:8888/publicShow/querySafeProduction.html";

            string[]            postParams = new string[] { "param", "corpType", "orgCode", "page" };
            string[]            postValues = new string[] { param, corpType, info.CorpCode, "1" };
            NameValueCollection nvc        = ToolWeb.GetNameValueCollection(postParams, postValues);
            string html = string.Empty;

            try
            {
                html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
            }
            catch
            {
                Thread.Sleep(12 * 60 * 1000);
                try
                {
                    html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
                }
                catch
                {
                    Thread.Sleep(8 * 60 * 1000);
                    return;
                }
            }
            JavaScriptSerializer        java        = new JavaScriptSerializer();
            Dictionary <string, object> jsonResults = (Dictionary <string, object>)java.DeserializeObject(html);
            int pageInt = 1;

            try
            {
                pageInt = (int)jsonResults["totalPage"];
            }
            catch { }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    postValues = new string[] { param, corpType, info.CorpCode, i.ToString() };
                    nvc        = ToolWeb.GetNameValueCollection(postParams, postValues);
                    try
                    {
                        html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
                    }
                    catch
                    {
                        Thread.Sleep(12 * 60 * 1000);
                        try
                        {
                            html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
                        }
                        catch
                        {
                            Thread.Sleep(8 * 60 * 1000);
                            continue;
                        }
                    }
                    jsonResults = (Dictionary <string, object>)java.DeserializeObject(html);
                }
                object[] dicRecords = (object[])jsonResults["records"];
                foreach (object dicRecord in dicRecords)
                {
                    Dictionary <string, object> dic = (Dictionary <string, object>)dicRecord;
                    string SecLicCode = string.Empty, SecLicDesc = string.Empty, ValidStartDate = string.Empty, ValidStartEnd = string.Empty, SecLicUnit = string.Empty;

                    SecLicCode     = Convert.ToString(dic["lics_id"]);
                    SecLicDesc     = Convert.ToString(dic["lics_range"]);
                    ValidStartDate = Convert.ToString(dic["valid_start_date"]);
                    ValidStartEnd  = Convert.ToString(dic["valid_end_date"]);
                    SecLicUnit     = Convert.ToString(dic["issue_dept"]);
                    if (Encoding.Default.GetByteCount(SecLicDesc) > 1000)
                    {
                        SecLicDesc = string.Empty;
                    }
                    CorpSecLic seclic = ToolDb.GenCorpSecLic(info.Id, SecLicCode, SecLicDesc, ValidStartDate, ValidStartEnd, SecLicUnit, info.Url);
                    ToolDb.SaveEntity(seclic, string.Empty);
                }
            }
        }