Пример #1
0
        private static List <string> GetHtml(string url)
        {
            List <string> lstImg = new List <string>();
            var           obj    = CacheHelper.Get("address");

            if (obj == null)
            {
                CacheHelper.AddPermanent("address", lstImg);
                obj = lstImg;
            }
            string html = HttpHelper.Get(url);

            if (html == "err")
            {
                System.Threading.Thread.Sleep(1000);
                //GetHtml(url);
                html = HttpHelper.Get(url);
                if (html == "err")
                {
                    throw new Exception();
                }
            }
            if (url.IndexOf("Html/Address") > -1)
            {
                url = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2016" + url.Substring(url.LastIndexOf("/"));
            }
            Regex           reg     = new Regex("<tr class='(.*?)'>(.*?)</tr>", RegexOptions.IgnoreCase);
            MatchCollection matches = reg.Matches(html);

            foreach (Match match in matches)
            {
                string          entityCode = url.Substring(url.LastIndexOf('/') + 1, url.Length - url.LastIndexOf('/') - 6).PadRight(12, '0');
                Regex           hrefReg    = new Regex("<a href='(.*?)'>");
                Regex           phrefReg   = new Regex("<a href='(.*?)'>(.*?)</a>");
                MatchCollection hrefs      = phrefReg.Matches(match.Value);
                if (match.Value.IndexOf(EnumEntity.AddressType.provincetr.ToString()) > -1)
                {
                    foreach (Match hrefm in hrefs)
                    {
                        //(obj as List<string>).Add(hrefm.Value);
                        Address_Province address = new Entity.Address_Province();
                        string[]         hrefarr = hrefm.Value.Replace("<a href='", "").Replace("'>", ";").Replace("<br/></a>", "").Split(';');
                        address.ProvinceCode = hrefarr[0].Substring(hrefarr[0].LastIndexOf('/') + 1, hrefarr[0].Length - hrefarr[0].LastIndexOf('/') - 6).PadRight(12, '0');
                        address.ProvinceName = hrefarr[1];
                        addressLogic.AddDbProvince(address);
                        if (!string.IsNullOrWhiteSpace(hrefm.Value))
                        {
                            GetHtml(url.Substring(0, url.LastIndexOf('/') + 1) + hrefarr[0].Substring(0, 2) + ".html");
                            int result = addressLogic.SaveDbAddress();
                            Log4net.LogHelper.WriteLog("地址采集入库", url + ";共计:" + result.ToString());
                        }
                    }
                }
                //else if(match.Value.IndexOf("villagetr") > -1)
                //{

                //    Regex tdReg = new Regex("<td>(.*?)</td>");
                //    MatchCollection hmatchs = tdReg.Matches(match.Value);
                //    string[] hrefarr = new string[2];
                //    hrefarr[0] = hmatchs[0].Value.Substring("<td>".Length,12);
                //    hrefarr[1] = hmatchs[2].Value.Substring("<td>".Length, hmatchs[2].Value.IndexOf("</td>")- "<td>".Length);

                //    Address_Village address = new Address_Village();
                //    address.VillageCode = hrefarr[0];
                //    address.VillageName = hrefarr[1];
                //    address.TownCode = entityCode;
                //    addressLogic.AddDbVillage(address);
                //}
                else
                {
                    if (hrefs.Count != 0)
                    {
                        string   type    = match.Value.Substring("<tr class='".Length, match.Value.IndexOf("'>") - "<tr class='".Length);
                        string[] hrefarr = new string[2];
                        hrefarr[0] = hrefs[0].Value.Replace("<a href='", "").Replace("'>", ";").Replace("</a>", "").Split(';')[1];
                        hrefarr[1] = hrefs[1].Value.Replace("<a href='", "").Replace("'>", ";").Replace("</a>", "").Split(';')[1];
                        Match hmatch = hrefReg.Match(match.Value);
                        if (type == EnumEntity.AddressType.citytr.ToString())
                        {
                            Address_City address = new Entity.Address_City();
                            address.CityCode     = hrefarr[0];
                            address.CityName     = hrefarr[1];
                            address.ProvinceCode = entityCode;
                            addressLogic.AddDbCity(address);
                        }
                        else if (type == EnumEntity.AddressType.countytr.ToString())
                        {
                            Address_County address = new Entity.Address_County();
                            address.CountyCode = hrefarr[0];
                            address.CountyName = hrefarr[1];
                            address.CityCode   = entityCode;
                            addressLogic.AddDbCounty(address);
                        }
                        else if (type == EnumEntity.AddressType.towntr.ToString())
                        {
                            Address_Town address = new Entity.Address_Town();
                            address.TownCode   = hrefarr[0];
                            address.TownName   = hrefarr[1];
                            address.CountyCode = entityCode;
                            addressLogic.AddDbTown(address);
                        }

                        if (type == EnumEntity.AddressType.towntr.ToString())
                        {
                        }
                        if (!string.IsNullOrWhiteSpace(hmatch.Value) && type != EnumEntity.AddressType.towntr.ToString())
                        {
                            GetHtml(url.Substring(0, url.LastIndexOf('/') + 1) + hmatch.Value.Replace("<a href='", "").ToString().Replace("'>", ""));
                        }
                        int result = addressLogic.SaveDbAddress();
                        Log4net.LogHelper.WriteLog("地址采集入库:" + hrefarr[0] + ";" + hrefarr[1], url + ";共计:" + result.ToString());
                    }
                }
            }

            return(obj as List <string>);
        }